Index: lib/Target/X86/CMakeLists.txt =================================================================== --- lib/Target/X86/CMakeLists.txt +++ lib/Target/X86/CMakeLists.txt @@ -37,6 +37,7 @@ X86WinEHState.cpp X86OptimizeLEAs.cpp X86FixupBWInsts.cpp + X86WinAllocaExpander.cpp ) add_llvm_target(X86CodeGen ${sources}) Index: lib/Target/X86/X86.h =================================================================== --- lib/Target/X86/X86.h +++ lib/Target/X86/X86.h @@ -59,6 +59,9 @@ /// recalculations. FunctionPass *createX86OptimizeLEAs(); +/// Return a pass that expands WinAlloca pseudo-instructions. +FunctionPass *createX86WinAllocaExpander(); + /// Return a pass that optimizes the code-size of x86 call sequences. This is /// done by replacing esp-relative movs with pushes. FunctionPass *createX86CallFrameOptimization(); Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1162,9 +1162,6 @@ MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr *I, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredCatchRet(MachineInstr *MI, MachineBasicBlock *BB) const; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -16610,14 +16610,9 @@ Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, DAG.getRegister(Vreg, SPTy)); } else { - SDValue Flag; - const unsigned Reg = (Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX); - - Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); - Flag = Chain.getValue(1); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - - Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); + Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Size); + MF.getInfo()->setHasWinAlloca(true); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned SPReg = RegInfo->getStackRegister(); @@ -23275,18 +23270,6 @@ } MachineBasicBlock * -X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, - MachineBasicBlock *BB) const { - assert(!Subtarget.isTargetMachO()); - DebugLoc DL = MI->getDebugLoc(); - MachineInstr *ResumeMI = Subtarget.getFrameLowering()->emitStackProbe( - *BB->getParent(), *BB, MI, DL, false); - MachineBasicBlock *ResumeBB = ResumeMI->getParent(); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return ResumeBB; -} - -MachineBasicBlock * X86TargetLowering::EmitLoweredCatchRet(MachineInstr *MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); @@ -23748,8 +23731,6 @@ case X86::TLS_base_addr32: case X86::TLS_base_addr64: return EmitLoweredTLSAddr(MI, BB); - case X86::WIN_ALLOCA: - return EmitLoweredWinAlloca(MI, BB); case X86::CATCHRET: return EmitLoweredCatchRet(MI, BB); case X86::CATCHPAD: Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -99,18 +99,6 @@ (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)), (implicit EFLAGS)]>; -// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows -// targets. These calls are needed to probe the stack when allocating more than -// 4k bytes in one go. Touching the stack at 4K increments is necessary to -// ensure that the guard pages used by the OS virtual memory manager are -// allocated in correct sequence. -// The main point of having separate instruction are extra unmodelled effects -// (compared to ordinary calls) like stack pointer change. - -let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in - def WIN_ALLOCA : I<0, Pseudo, (outs), (ins), - "# dynamic stack allocation", - [(X86WinAlloca)]>; // When using segmented stacks these are lowered into instructions which first // check if the current stacklet has enough free memory. If it does, memory is @@ -132,6 +120,27 @@ Requires<[In64BitMode]>; } +// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows +// targets. These calls are needed to probe the stack when allocating more than +// 4k bytes in one go. Touching the stack at 4K increments is necessary to +// ensure that the guard pages used by the OS virtual memory manager are +// allocated in correct sequence. +// The main point of having separate instruction are extra unmodelled effects +// (compared to ordinary calls) like stack pointer change. + +let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in +def WIN_ALLOCA_32 : I<0, Pseudo, (outs), (ins GR32:$size), + "# dynamic stack allocation", + [(X86WinAlloca GR32:$size)]>, + Requires<[NotLP64]>; + +let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in +def WIN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size), + "# dynamic stack allocation", + [(X86WinAlloca GR64:$size)]>, + Requires<[In64BitMode]>; + + //===----------------------------------------------------------------------===// // EH Pseudo Instructions // Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -112,6 +112,8 @@ def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_X86WIN_ALLOCA : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; + def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; @@ -273,8 +275,8 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; -def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; +def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA, + [SDNPHasChain, SDNPOutGlue]>; def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA, [SDNPHasChain]>; Index: lib/Target/X86/X86MachineFunctionInfo.h =================================================================== --- lib/Target/X86/X86MachineFunctionInfo.h +++ lib/Target/X86/X86MachineFunctionInfo.h @@ -98,6 +98,9 @@ /// True if this function uses the red zone. bool UsesRedZone = false; + /// True if this function has WIN_ALLOCA instructions. + bool HasWinAlloca = false; + private: /// ForwardedMustTailRegParms - A list of virtual and physical registers /// that must be forwarded to every musttail call. @@ -172,6 +175,9 @@ bool getUsesRedZone() const { return UsesRedZone; } void setUsesRedZone(bool V) { UsesRedZone = V; } + + bool hasWinAlloca() const { return HasWinAlloca; } + void setHasWinAlloca(bool v) { HasWinAlloca = v; } }; } // End llvm namespace Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -266,6 +266,7 @@ addPass(createX86OptimizeLEAs()); addPass(createX86CallFrameOptimization()); + addPass(createX86WinAllocaExpander()); } void X86PassConfig::addPostRegAlloc() { Index: lib/Target/X86/X86WinAllocaExpander.cpp =================================================================== --- /dev/null +++ lib/Target/X86/X86WinAllocaExpander.cpp @@ -0,0 +1,298 @@ +//===----- X86WinAllocaExpander.cpp - Expand WinAlloca pseudo instruction -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pass that expands WinAlloca pseudo-instructions. +// +// It performs a conservative analysis to determine whether each allocation +// falls within a region of the stack that is safe to use, or whether stack +// probes must be emitted. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86InstrInfo.h" +#include "X86MachineFunctionInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +namespace { + +class X86WinAllocaExpander : public MachineFunctionPass { +public: + X86WinAllocaExpander() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + /// Strategies for lowering a WinAlloca. + enum Lowering { TouchAndSub, Sub, Probe }; + + /// Deterministic-order map from WinAlloca instruction to desired lowering. + typedef MapVector LoweringMap; + + /// Compute which lowering to use for each WinAlloca instruction. + void computeLowerings(MachineFunction &MF, LoweringMap& Lowerings); + + /// Get the appropriate lowering based on current offset and amount. + Lowering getLowering(int64_t CurrentOffset, int64_t AllocaAmount); + + /// Lower a WinAlloca instruction. + void lower(MachineInstr* MI, Lowering L); + + MachineRegisterInfo *MRI; + const X86Subtarget *STI; + const TargetInstrInfo *TII; + const X86RegisterInfo *TRI; + unsigned StackPtr; + unsigned SlotSize; + unsigned StackProbeSize; + + const char *getPassName() const override { return "X86 WinAlloca Expander"; } + static char ID; +}; + +char X86WinAllocaExpander::ID = 0; + +} // end anonymous namespace + +FunctionPass *llvm::createX86WinAllocaExpander() { + return new X86WinAllocaExpander(); +} + +/// Return the allocation amount for a WinAlloca instruction, or -1 if unknown. +static int64_t getWinAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) { + assert(MI->getOpcode() == X86::WIN_ALLOCA_32 || + MI->getOpcode() == X86::WIN_ALLOCA_64); + assert(MI->getOperand(0).isReg()); + + unsigned AmountReg = MI->getOperand(0).getReg(); + MachineInstr *Def = MRI->getUniqueVRegDef(AmountReg); + + // Look through copies. + while (Def && Def->isCopy() && Def->getOperand(1).isReg()) + Def = MRI->getUniqueVRegDef(Def->getOperand(1).getReg()); + + if (!Def || + (Def->getOpcode() != X86::MOV32ri && Def->getOpcode() != X86::MOV64ri) || + !Def->getOperand(1).isImm()) + return -1; + + return Def->getOperand(1).getImm(); +} + +X86WinAllocaExpander::Lowering +X86WinAllocaExpander::getLowering(int64_t CurrentOffset, + int64_t AllocaAmount) { + // For a non-constant amount or a large amount, we have to probe. + if (AllocaAmount < 0 || AllocaAmount > StackProbeSize) + return Probe; + + // If it fits within the safe region of the stack, just subtract. + if (CurrentOffset + AllocaAmount <= StackProbeSize) + return Sub; + + // Otherwise, touch the current tip of the stack, then subtract. + return TouchAndSub; +} + +static bool isPushPop(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case X86::PUSH32i8: + case X86::PUSH32r: + case X86::PUSH32rmm: + case X86::PUSH32rmr: + case X86::PUSHi32: + case X86::PUSH64i8: + case X86::PUSH64r: + case X86::PUSH64rmm: + case X86::PUSH64rmr: + case X86::PUSH64i32: + case X86::POP32r: + case X86::POP64r: + return true; + default: + return false; + } +} + +void X86WinAllocaExpander::computeLowerings(MachineFunction &MF, + LoweringMap &Lowerings) { + // Do a one-pass reverse post-order walk of the CFG to conservatively estimate + // the offset between the stack pointer and the lowest touched part of the + // stack, and use that to decide how to lower each WinAlloca instruction. + + // Initialize OutOffset[B], the stack offset at exit from B, to something big. + DenseMap OutOffset; + for (MachineBasicBlock &MBB : MF) + OutOffset[&MBB] = INT32_MAX; + + // Note: we don't know the offset at the start of the entry block since the + // prologue hasn't been inserted yet, and how much that will adjust the stack + // pointer depends on register spills, which have not been computed yet. + + // Compute the reverse post-order. + ReversePostOrderTraversal RPO(&MF); + + for (MachineBasicBlock *MBB : RPO) { + int64_t Offset = -1; + for (MachineBasicBlock *Pred : MBB->predecessors()) + Offset = std::max(Offset, OutOffset[Pred]); + if (Offset == -1) Offset = INT32_MAX; + + for (MachineInstr &MI : *MBB) { + if (MI.getOpcode() == X86::WIN_ALLOCA_32 || + MI.getOpcode() == X86::WIN_ALLOCA_64) { + // A WinAlloca moves StackPtr, and potentially touches it. + int64_t Amount = getWinAllocaAmount(&MI, MRI); + Lowering L = getLowering(Offset, Amount); + Lowerings[&MI] = L; + switch (L) { + case Sub: + Offset += Amount; + break; + case TouchAndSub: + Offset = Amount; + break; + case Probe: + Offset = 0; + break; + } + } else if (MI.isCall() || isPushPop(MI)) { + // Calls, pushes and pops touch the tip of the stack. + Offset = 0; + } else if (MI.getOpcode() == X86::ADJCALLSTACKUP32 || + MI.getOpcode() == X86::ADJCALLSTACKUP64) { + Offset -= MI.getOperand(0).getImm(); + } else if (MI.getOpcode() == X86::ADJCALLSTACKDOWN32 || + MI.getOpcode() == X86::ADJCALLSTACKDOWN64) { + Offset += MI.getOperand(0).getImm(); + } else if (MI.modifiesRegister(StackPtr, TRI)) { + // Any other modification of SP means we've lost track of it. + Offset = INT32_MAX; + } + } + + OutOffset[MBB] = Offset; + } +} + +static unsigned getSubOpcode(bool Is64Bit, int64_t Amount) { + if (Is64Bit) + return isInt<8>(Amount) ? X86::SUB64ri8 : X86::SUB64ri32; + return isInt<8>(Amount) ? X86::SUB32ri8 : X86::SUB32ri; +} + +void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) { + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::iterator I = *MI; + + int64_t Amount = getWinAllocaAmount(MI, MRI); + if (Amount == 0) { + MI->eraseFromParent(); + return; + } + + bool Is64Bit = STI->is64Bit(); + assert(SlotSize == 4 || SlotSize == 8); + unsigned RegA = (SlotSize == 8) ? X86::RAX : X86::EAX; + + if (Amount != -1) { + assert((Amount % SlotSize == 0) && "Stack would not be aligned!"); + } + + switch (L) { + case TouchAndSub: + assert(Amount >= SlotSize); + + // Use a push to touch the top of the stack. + BuildMI(*MBB, I, DL, TII->get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(RegA, RegState::Undef); + Amount -= SlotSize; + if (!Amount) + break; + + // Fall through to make any remaining adjustment. + case Sub: + assert(Amount > 0); + if (Amount == SlotSize) { + // Use push to save size. + BuildMI(*MBB, I, DL, TII->get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(RegA, RegState::Undef); + } else { + // Sub. + BuildMI(*MBB, I, DL, TII->get(getSubOpcode(Is64Bit, Amount)), StackPtr) + .addReg(StackPtr) + .addImm(Amount); + } + break; + case Probe: + // The probe lowering expects the amount in RAX/EAX. + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY), RegA) + .addReg(MI->getOperand(0).getReg()); + + // Do the probe. + STI->getFrameLowering()->emitStackProbe(*MBB->getParent(), *MBB, MI, DL, + /*InPrologue=*/false); + break; + } + + unsigned AmountReg = MI->getOperand(0).getReg(); + MI->eraseFromParent(); + + // Delete the definition of AmountReg, possibly walking a chain of copies. + for (;;) { + if (!MRI->use_empty(AmountReg)) + break; + MachineInstr *AmountDef = MRI->getUniqueVRegDef(AmountReg); + if (!AmountDef) + break; + if (AmountDef->isCopy() && AmountDef->getOperand(1).isReg()) + AmountReg = AmountDef->getOperand(1).isReg(); + AmountDef->eraseFromParent(); + break; + } +} + +bool X86WinAllocaExpander::runOnMachineFunction(MachineFunction &MF) { + if (!MF.getInfo()->hasWinAlloca()) + return false; + + MRI = &MF.getRegInfo(); + STI = &MF.getSubtarget(); + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); + StackPtr = TRI->getStackRegister(); + SlotSize = TRI->getSlotSize(); + + StackProbeSize = 4096; + if (MF.getFunction()->hasFnAttribute("stack-probe-size")) { + MF.getFunction() + ->getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + } + + LoweringMap Lowerings; + computeLowerings(MF, Lowerings); + for (auto &P : Lowerings) + lower(P.first, P.second); + + return true; +} Index: test/CodeGen/X86/cleanuppad-inalloca.ll =================================================================== --- test/CodeGen/X86/cleanuppad-inalloca.ll +++ test/CodeGen/X86/cleanuppad-inalloca.ll @@ -38,8 +38,8 @@ ; CHECK: pushl %ebp ; CHECK: movl %esp, %ebp ; CHECK: subl ${{[0-9]+}}, %esp -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax ; CHECK: calll "??0A@@QAE@XZ" ; CHECK: calll "??0A@@QAE@XZ" ; CHECK: calll _takes_two Index: test/CodeGen/X86/dynamic-alloca-in-entry.ll =================================================================== --- test/CodeGen/X86/dynamic-alloca-in-entry.ll +++ test/CodeGen/X86/dynamic-alloca-in-entry.ll @@ -15,5 +15,5 @@ ret void } ; CHECK-LABEL: _bar: -; CHECK: calll __chkstk +; CHECK: pushl %eax ; CHECK: retl Index: test/CodeGen/X86/inalloca-ctor.ll =================================================================== --- test/CodeGen/X86/inalloca-ctor.ll +++ test/CodeGen/X86/inalloca-ctor.ll @@ -12,8 +12,8 @@ entry: %args = alloca inalloca %frame %c = getelementptr %frame, %frame* %args, i32 0, i32 2 -; CHECK: movl $20, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: subl $16, %esp ; CHECK: movl %esp, call void @Foo_ctor(%Foo* %c) ; CHECK: leal 12(%{{.*}}), Index: test/CodeGen/X86/inalloca-invoke.ll =================================================================== --- test/CodeGen/X86/inalloca-invoke.ll +++ test/CodeGen/X86/inalloca-invoke.ll @@ -21,7 +21,8 @@ %beg = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 0 %end = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 1 -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: subl $20, %esp ; CHECK: movl %esp, %[[beg:[^ ]*]] ; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]] Index: test/CodeGen/X86/inalloca-stdcall.ll =================================================================== --- test/CodeGen/X86/inalloca-stdcall.ll +++ test/CodeGen/X86/inalloca-stdcall.ll @@ -8,8 +8,8 @@ define void @g() { ; CHECK-LABEL: _g: %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 Index: test/CodeGen/X86/inalloca.ll =================================================================== --- test/CodeGen/X86/inalloca.ll +++ test/CodeGen/X86/inalloca.ll @@ -8,8 +8,8 @@ ; CHECK-LABEL: _a: entry: %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 @@ -28,8 +28,8 @@ ; CHECK-LABEL: _b: entry: %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 @@ -49,8 +49,8 @@ ; CHECK-LABEL: _c: entry: %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 Index: test/CodeGen/X86/shrink-wrap-chkstk.ll =================================================================== --- test/CodeGen/X86/shrink-wrap-chkstk.ll +++ test/CodeGen/X86/shrink-wrap-chkstk.ll @@ -7,7 +7,7 @@ target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686-pc-windows-msvc18.0.0" -%struct.S = type { [12 x i8] } +%struct.S = type { [8192 x i8] } define x86_thiscallcc void @call_inalloca(i1 %x) { entry: @@ -29,7 +29,7 @@ ; CHECK-LABEL: _call_inalloca: # @call_inalloca ; CHECK: pushl %ebp ; CHECK: movl %esp, %ebp -; CHECK: movl $12, %eax +; CHECK: movl $8192, %eax ; CHECK: calll __chkstk ; CHECK: calll _inalloca_params ; CHECK: movl %ebp, %esp Index: test/CodeGen/X86/win-alloca-expander.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/win-alloca-expander.ll @@ -0,0 +1,153 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s + +%struct.S = type { [1024 x i8] } +%struct.T = type { [3000 x i8] } +%struct.U = type { [10000 x i8] } + +define void @basics() { +; CHECK-LABEL: basics: +entry: + br label %bb1 + +; Allocation move sizes should have been removed. +; CHECK-NOT: movl $1024 +; CHECK-NOT: movl $3000 + +bb1: + %p0 = alloca %struct.S +; The allocation is small enough not to require stack probing, but the %esp +; offset after the prologue is not known, so the stack must be touched before +; the pointer is adjusted. +; CHECK: pushl %eax +; CHECK: subl $1020, %esp + + %saved_stack = tail call i8* @llvm.stacksave() + + %p1 = alloca %struct.S +; We know the %esp offset from above, so there is no need to touch the stack +; before adjusting it. +; CHECK: subl $1024, %esp + + %p2 = alloca %struct.T +; The offset is now 2048 bytes, so allocating a T must touch the stack again. +; CHECK: pushl %eax +; CHECK: subl $2996, %esp + + call void @f(%struct.S* %p0) +; CHECK: calll + + %p3 = alloca %struct.T +; The call above touched the stack, so there is room for a T object. +; CHECK: subl $3000, %esp + + %p4 = alloca %struct.U +; The U object is large enough to require stack probing. +; CHECK: movl $10000, %eax +; CHECK: calll __chkstk + + %p5 = alloca %struct.T +; The stack probing above touched the tip of the stack, so there's room for a T. +; CHECK: subl $3000, %esp + + call void @llvm.stackrestore(i8* %saved_stack) + %p6 = alloca %struct.S +; The stack restore means we lose track of the stack pointer and must probe. +; CHECK: pushl %eax +; CHECK: subl $1020, %esp + +; Use the pointers so they're not optimized away. + call void @f(%struct.S* %p1) + call void @g(%struct.T* %p2) + call void @g(%struct.T* %p3) + call void @h(%struct.U* %p4) + call void @g(%struct.T* %p5) + ret void +} + +define void @loop() { +; CHECK-LABEL: loop: +entry: + br label %bb1 + +bb1: + %p1 = alloca %struct.S +; The entry offset is unknown; touch-and-sub. +; CHECK: pushl %eax +; CHECK: subl $1020, %esp + br label %loop1 + +loop1: + %i1 = phi i32 [ 10, %bb1 ], [ %dec1, %loop1 ] + %p2 = alloca %struct.S +; We know the incoming offset from bb1, but from the back-edge, we assume the +; worst, and therefore touch-and-sub to allocate. +; CHECK: pushl %eax +; CHECK: subl $1020, %esp + %dec1 = sub i32 %i1, 1 + %cmp1 = icmp sgt i32 %i1, 0 + br i1 %cmp1, label %loop1, label %end +; CHECK: decl +; CHECK: jg + +end: + call void @f(%struct.S* %p1) + call void @f(%struct.S* %p2) + ret void +} + +define void @probe_size_attribute() "stack-probe-size"="512" { +; CHECK-LABEL: probe_size_attribute: +entry: + br label %bb1 + +bb1: + %p0 = alloca %struct.S +; The allocation would be small enough not to require probing, if it wasn't +; for the stack-probe-size attribute. +; CHECK: movl $1024, %eax +; CHECK: calll __chkstk + call void @f(%struct.S* %p0) + ret void +} + +define void @cfg(i1 %x, i1 %y) { +; Test that the blocks are analyzed in the correct order. +; CHECK-LABEL: cfg: +entry: + br i1 %x, label %bb1, label %bb2 + +bb1: + %p1 = alloca %struct.S +; CHECK: pushl %eax +; CHECK: subl $1020, %esp + br label %bb3 +bb2: + %p2 = alloca %struct.T +; CHECK: pushl %eax +; CHECK: subl $2996, %esp + br label %bb3 + +bb3: + br i1 %y, label %bb4, label %bb5 + +bb4: + %p4 = alloca %struct.S +; CHECK: subl $1024, %esp + call void @f(%struct.S* %p4) + ret void + +bb5: + %p5 = alloca %struct.T +; CHECK: pushl %eax +; CHECK: subl $2996, %esp + call void @g(%struct.T* %p5) + ret void +} + + +declare void @f(%struct.S*) +declare void @g(%struct.T*) +declare void @h(%struct.U*) + +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*)