Index: include/llvm/CodeGen/MachineFrameInfo.h =================================================================== --- include/llvm/CodeGen/MachineFrameInfo.h +++ include/llvm/CodeGen/MachineFrameInfo.h @@ -278,6 +278,9 @@ /// Not null, if shrink-wrapping found a better place for the epilogue. MachineBasicBlock *Restore = nullptr; + /// Whether the function has WIN_ALLOCA instructions. + bool HasWinAlloca = false; + public: explicit MachineFrameInfo(unsigned StackAlignment, bool StackRealignable, bool ForcedRealign) @@ -656,6 +659,10 @@ /// method always returns an empty set. BitVector getPristineRegs(const MachineFunction &MF) const; + /// Does the function have WIN_ALLOCA instructions? + bool hasWinAlloca() const { return HasWinAlloca; } + void setHasWinAlloca(bool v) { HasWinAlloca = v; } + /// Used by the MachineFunction printer to print information about /// stack objects. Implemented in MachineFunction.cpp. void print(const MachineFunction &MF, raw_ostream &OS) const; Index: lib/Target/X86/CMakeLists.txt =================================================================== --- lib/Target/X86/CMakeLists.txt +++ lib/Target/X86/CMakeLists.txt @@ -37,6 +37,7 @@ X86WinEHState.cpp X86OptimizeLEAs.cpp X86FixupBWInsts.cpp + X86WinAllocaExpander.cpp ) add_llvm_target(X86CodeGen ${sources}) Index: lib/Target/X86/X86.h =================================================================== --- lib/Target/X86/X86.h +++ lib/Target/X86/X86.h @@ -59,6 +59,9 @@ /// recalculations. FunctionPass *createX86OptimizeLEAs(); +/// XXX +FunctionPass *createX86WinAllocaExpander(); + /// Return a pass that optimizes the code-size of x86 call sequences. This is /// done by replacing esp-relative movs with pushes. FunctionPass *createX86CallFrameOptimization(); Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1162,9 +1162,6 @@ MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr *I, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredCatchRet(MachineInstr *MI, MachineBasicBlock *BB) const; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -16601,14 +16601,13 @@ Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, DAG.getRegister(Vreg, SPTy)); } else { - SDValue Flag; - const unsigned Reg = (Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX); - - Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); - Flag = Chain.getValue(1); + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned SizeReg = MRI.createVirtualRegister(getRegClassFor(SPTy)); + Chain = DAG.getCopyToReg(Chain, dl, SizeReg, Size); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - - Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); + Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, + DAG.getRegister(SizeReg, SPTy)); + MF.getFrameInfo()->setHasWinAlloca(true); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned SPReg = RegInfo->getStackRegister(); @@ -23133,18 +23132,6 @@ } MachineBasicBlock * -X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, - MachineBasicBlock *BB) const { - assert(!Subtarget.isTargetMachO()); - DebugLoc DL = MI->getDebugLoc(); - MachineInstr *ResumeMI = Subtarget.getFrameLowering()->emitStackProbe( - *BB->getParent(), *BB, MI, DL, false); - MachineBasicBlock *ResumeBB = ResumeMI->getParent(); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return ResumeBB; -} - -MachineBasicBlock * X86TargetLowering::EmitLoweredCatchRet(MachineInstr *MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); @@ -23606,8 +23593,6 @@ case X86::TLS_base_addr32: case X86::TLS_base_addr64: return EmitLoweredTLSAddr(MI, BB); - case X86::WIN_ALLOCA: - return EmitLoweredWinAlloca(MI, BB); case X86::CATCHRET: return EmitLoweredCatchRet(MI, BB); case X86::CATCHPAD: Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -99,18 +99,6 @@ (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)), (implicit EFLAGS)]>; -// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows -// targets. These calls are needed to probe the stack when allocating more than -// 4k bytes in one go. Touching the stack at 4K increments is necessary to -// ensure that the guard pages used by the OS virtual memory manager are -// allocated in correct sequence. -// The main point of having separate instruction are extra unmodelled effects -// (compared to ordinary calls) like stack pointer change. - -let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in - def WIN_ALLOCA : I<0, Pseudo, (outs), (ins), - "# dynamic stack allocation", - [(X86WinAlloca)]>; // When using segmented stacks these are lowered into instructions which first // check if the current stacklet has enough free memory. If it does, memory is @@ -132,6 +120,27 @@ Requires<[In64BitMode]>; } +// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows +// targets. These calls are needed to probe the stack when allocating more than +// 4k bytes in one go. Touching the stack at 4K increments is necessary to +// ensure that the guard pages used by the OS virtual memory manager are +// allocated in correct sequence. +// The main point of having separate instruction are extra unmodelled effects +// (compared to ordinary calls) like stack pointer change. + +let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in +def WIN_ALLOCA_32 : I<0, Pseudo, (outs), (ins GR32:$size), + "# dynamic stack allocation", + [(X86WinAlloca GR32:$size)]>, + Requires<[NotLP64]>; + +let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in +def WIN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size), + "# dynamic stack allocation", + [(X86WinAlloca GR64:$size)]>, + Requires<[In64BitMode]>; + + //===----------------------------------------------------------------------===// // EH Pseudo Instructions // Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -112,6 +112,8 @@ def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_X86WIN_ALLOCA : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; + def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; @@ -273,8 +275,8 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; -def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; +def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA, + [SDNPHasChain, SDNPOutGlue]>; def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA, [SDNPHasChain]>; Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -266,6 +266,7 @@ addPass(createX86OptimizeLEAs()); addPass(createX86CallFrameOptimization()); + addPass(createX86WinAllocaExpander()); } void X86PassConfig::addPostRegAlloc() { Index: lib/Target/X86/X86WinAllocaExpander.cpp =================================================================== --- /dev/null +++ lib/Target/X86/X86WinAllocaExpander.cpp @@ -0,0 +1,274 @@ +//===----- X86WinAllocaExpander.cpp - Expand WinAlloca pseudo instruction -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pass that expands WinAlloca pseudo-instructions. +// +// It performs a conservative analysis to determine whether each allocation +// falls within a region of the stack that is safe to use, or whether stack +// probes must be emitted. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86InstrInfo.h" +#include "X86MachineFunctionInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +namespace { + +class X86WinAllocaExpander : public MachineFunctionPass { +public: + X86WinAllocaExpander() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + /// Strategies for lowering a WinAlloca. + enum Lowering { TouchAndSub, Sub, Probe }; + + /// Deterministic-order map from WinAlloca instruction to desired lowering. + typedef MapVector LoweringMap; + + /// Compute which lowering to use for each WinAlloca instruction. + LoweringMap computeLowerings(MachineFunction &MF); + + /// Get the appropriate lowering based on current offset and amount. + Lowering getLowering(int64_t CurrentOffset, int64_t AllocaAmount); + + /// Lower a WinAlloca instruction. + void lower(MachineInstr* MI, Lowering L); + + MachineRegisterInfo *MRI; + const X86Subtarget *STI; + const TargetInstrInfo *TII; + unsigned StackPtr; + unsigned StackProbeSize; + + const char *getPassName() const override { return "X86 WinAlloca Expander"; } + static char ID; +}; + +char X86WinAllocaExpander::ID = 0; + +} // end anonymous namespace + +FunctionPass *llvm::createX86WinAllocaExpander() { + return new X86WinAllocaExpander(); +} + +/// Return the allocation amount for a WinAlloca instruction, or -1 if unknown. +static int64_t getWinAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) { + assert(MI->getOpcode() == X86::WIN_ALLOCA_32 || + MI->getOpcode() == X86::WIN_ALLOCA_64); + assert(MI->getOperand(0).isReg()); + + unsigned AmountReg = MI->getOperand(0).getReg(); + MachineInstr *Def = MRI->getUniqueVRegDef(AmountReg); + + // Look through copies. + while (Def && Def->isCopy() && Def->getOperand(1).isReg()) + Def = MRI->getUniqueVRegDef(Def->getOperand(1).getReg()); + + if (!Def || + (Def->getOpcode() != X86::MOV32ri && Def->getOpcode() != X86::MOV64ri) || + !Def->getOperand(1).isImm()) + return -1; + + return Def->getOperand(1).getImm(); +} + +X86WinAllocaExpander::Lowering +X86WinAllocaExpander::getLowering(int64_t CurrentOffset, + int64_t AllocaAmount) { + // For a non-constant amount or a large amount, we have to probe. + if (AllocaAmount == -1 || AllocaAmount > StackProbeSize) + return Probe; + + // If it fits within the safe region of the stack, just subtract. + if (CurrentOffset + AllocaAmount <= StackProbeSize) + return Sub; + + // Otherwise, touch the current tip of the stack, then subtract. + assert(AllocaAmount <= StackProbeSize); + return TouchAndSub; +} + +X86WinAllocaExpander::LoweringMap +X86WinAllocaExpander::computeLowerings(MachineFunction &MF) { + // Do a one-pass reverse post-order walk of the CFG to conservatively estimate + // the offset between the stack pointer and the lowerst touched part of the + // stack, and use that to decide how to lower each WinAlloca instruction. + + // Compute the reverse post-order. + SmallVector RPO; + RPO.reserve(MF.size()); + std::copy(po_begin(&MF), po_end(&MF), std::back_inserter(RPO)); + std::reverse(RPO.begin(), RPO.end()); + + // Initialize Out[B], the stack offset at exit from B, to something big. + DenseMap Out; + for (MachineBasicBlock *MBB : RPO) + Out[MBB] = INT32_MAX; + + // Note: we don't know the offset at the start of the entry block since the + // prologue hasn't been inserted yet, and how much that will adjust the stack + // pointer depends on register spills, which have not been computed yet. + + LoweringMap Lowerings; + for (MachineBasicBlock *MBB : RPO) { + int64_t Offset = -1; + for (MachineBasicBlock *Pred : MBB->predecessors()) + Offset = std::max(Offset, Out[Pred]); + if (Offset == -1) Offset = INT32_MAX; + + for (MachineInstr &MI : *MBB) { + // A call touches the tip of the stack. + if (MI.isCall()) + Offset = 0; + + // A WinAlloca moves StackPtr, and potentially touches it. + if (MI.getOpcode() == X86::WIN_ALLOCA_32 || + MI.getOpcode() == X86::WIN_ALLOCA_64) { + int64_t Amount = getWinAllocaAmount(&MI, MRI); + Lowering L = getLowering(Offset, Amount); + Lowerings[&MI] = L; + switch (L) { + case Sub: + Offset += Amount; + break; + case TouchAndSub: + Offset = Amount; + break; + case Probe: + Offset = 0; + break; + } + } + + // A stackrestore makes the offset unknown. + if (MI.isCopy() && MI.getOperand(0).isReg() && + MI.getOperand(0).getReg() == StackPtr) + Offset = -1; + } + + Out[MBB] = Offset; + } + + return Lowerings; +} + +static unsigned getSubOpcode(bool Is64Bit, int64_t Amount) { + if (Is64Bit) + return isInt<8>(Amount) ? X86::SUB64ri8 : X86::SUB64ri32; + return isInt<8>(Amount) ? X86::SUB32ri8 : X86::SUB32ri; +} + +void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) { + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::iterator I = *MI; + + int64_t Amount = getWinAllocaAmount(MI, MRI); + if (Amount == 0) { + MI->eraseFromParent(); + return; + } + + bool Is64Bit = STI->is64Bit(); + + if (Amount != -1) { + assert((Amount % (Is64Bit ? 8 : 4) == 0) && "Stack would not be aligned!"); + } + + switch (L) { + case TouchAndSub: + assert(Amount >= (Is64Bit ? 8 : 4)); + + // Use a push to touch the top of the stack. + BuildMI(*MBB, I, DL, TII->get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(Is64Bit ? X86::RAX : X86::EAX, RegState::Undef); + Amount -= (Is64Bit ? 8 : 4); + if (!Amount) + break; + + // Fall through to make any remaining adjustment. + case Sub: + assert(Amount > 0); + if (Amount == (Is64Bit ? 8 : 4)) { + // Use push to save size. + BuildMI(*MBB, I, DL, TII->get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(Is64Bit ? X86::RAX : X86::EAX, RegState::Undef); + } else { + // Sub. + BuildMI(*MBB, I, DL, TII->get(getSubOpcode(Is64Bit, Amount)), StackPtr) + .addReg(StackPtr) + .addImm(Amount); + } + break; + case Probe: + // The probe lowering expects the amount in RAX/EAX. + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY), + Is64Bit ? X86::RAX : X86::EAX) + .addReg(MI->getOperand(0).getReg()); + + // Do the probe. + STI->getFrameLowering()->emitStackProbe(*MBB->getParent(), *MBB, MI, DL, + /*InPrologue=*/false); + break; + } + unsigned AmountReg = MI->getOperand(0).getReg(); + MI->eraseFromParent(); + + // Delete the definition of AmountReg, possibly walking a chain of copies. + for (;;) { + if (!MRI->use_empty(AmountReg)) + break; + MachineInstr *AmountDef = MRI->getUniqueVRegDef(AmountReg); + if (!AmountDef) + break; + if (AmountDef->isCopy() && AmountDef->getOperand(1).isReg()) + AmountReg = AmountDef->getOperand(1).isReg(); + AmountDef->eraseFromParent(); + } +} + +bool X86WinAllocaExpander::runOnMachineFunction(MachineFunction &MF) { + if (!MF.getFrameInfo()->hasWinAlloca()) + return false; + + MRI = &MF.getRegInfo(); + STI = &MF.getSubtarget(); + TII = STI->getInstrInfo(); + StackPtr = STI->is64Bit() ? X86::RSP : X86::ESP; + + StackProbeSize = 4096; + if (MF.getFunction()->hasFnAttribute("stack-probe-size")) { + MF.getFunction() + ->getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + } + + LoweringMap Lowerings = computeLowerings(MF); + for (auto &P : Lowerings) + lower(P.first, P.second); + + return true; +} Index: test/CodeGen/X86/cleanuppad-inalloca.ll =================================================================== --- test/CodeGen/X86/cleanuppad-inalloca.ll +++ test/CodeGen/X86/cleanuppad-inalloca.ll @@ -38,8 +38,8 @@ ; CHECK: pushl %ebp ; CHECK: movl %esp, %ebp ; CHECK: subl ${{[0-9]+}}, %esp -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax ; CHECK: calll "??0A@@QAE@XZ" ; CHECK: calll "??0A@@QAE@XZ" ; CHECK: calll _takes_two Index: test/CodeGen/X86/dynamic-alloca-in-entry.ll =================================================================== --- test/CodeGen/X86/dynamic-alloca-in-entry.ll +++ test/CodeGen/X86/dynamic-alloca-in-entry.ll @@ -15,5 +15,5 @@ ret void } ; CHECK-LABEL: _bar: -; CHECK: calll __chkstk +; CHECK: pushl %eax ; CHECK: retl Index: test/CodeGen/X86/inalloca-ctor.ll =================================================================== --- test/CodeGen/X86/inalloca-ctor.ll +++ test/CodeGen/X86/inalloca-ctor.ll @@ -12,8 +12,8 @@ entry: %args = alloca inalloca %frame %c = getelementptr %frame, %frame* %args, i32 0, i32 2 -; CHECK: movl $20, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: subl $16, %esp ; CHECK: movl %esp, call void @Foo_ctor(%Foo* %c) ; CHECK: leal 12(%{{.*}}), Index: test/CodeGen/X86/inalloca-invoke.ll =================================================================== --- test/CodeGen/X86/inalloca-invoke.ll +++ test/CodeGen/X86/inalloca-invoke.ll @@ -21,7 +21,8 @@ %beg = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 0 %end = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 1 -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: subl $20, %esp ; CHECK: movl %esp, %[[beg:[^ ]*]] ; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]] Index: test/CodeGen/X86/inalloca-stdcall.ll =================================================================== --- test/CodeGen/X86/inalloca-stdcall.ll +++ test/CodeGen/X86/inalloca-stdcall.ll @@ -8,8 +8,8 @@ define void @g() { ; CHECK-LABEL: _g: %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 Index: test/CodeGen/X86/inalloca.ll =================================================================== --- test/CodeGen/X86/inalloca.ll +++ test/CodeGen/X86/inalloca.ll @@ -8,8 +8,8 @@ ; CHECK-LABEL: _a: entry: %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 @@ -28,8 +28,8 @@ ; CHECK-LABEL: _b: entry: %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 @@ -49,8 +49,8 @@ ; CHECK-LABEL: _c: entry: %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 Index: test/CodeGen/X86/shrink-wrap-chkstk.ll =================================================================== --- test/CodeGen/X86/shrink-wrap-chkstk.ll +++ test/CodeGen/X86/shrink-wrap-chkstk.ll @@ -7,7 +7,7 @@ target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686-pc-windows-msvc18.0.0" -%struct.S = type { [12 x i8] } +%struct.S = type { [8192 x i8] } define x86_thiscallcc void @call_inalloca(i1 %x) { entry: @@ -29,7 +29,7 @@ ; CHECK-LABEL: _call_inalloca: # @call_inalloca ; CHECK: pushl %ebp ; CHECK: movl %esp, %ebp -; CHECK: movl $12, %eax +; CHECK: movl $8192, %eax ; CHECK: calll __chkstk ; CHECK: calll _inalloca_params ; CHECK: movl %ebp, %esp Index: test/CodeGen/X86/win-alloca-expander.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/win-alloca-expander.ll @@ -0,0 +1,89 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s + +%struct.S = type { [1024 x i8] } +%struct.T = type { [3000 x i8] } +%struct.U = type { [10000 x i8] } + +define void @basics() { +; CHECK-LABEL: basics: +entry: + br label %bb1 + +bb1: + %p0 = alloca %struct.S +; The allocation is small enough not to require stack probing, but the %esp +; offset after the prologue is not known, so the stack must be touched before +; the pointer is adjusted. +; CHECK: pushl %eax +; CHECK: subl $1020, %esp + + %p1 = alloca %struct.S +; We know the %esp offset from above, so there is no need to touch the stack +; before adjusting it. +; CHECK: subl $1024, %esp + + %p2 = alloca %struct.T +; The offset is now 2048 bytes, so allocating a T must touch the stack again. +; CHECK: pushl %eax +; CHECK: subl $2996, %esp + + call void @f(%struct.S* %p0) +; CHECK: calll + + %p3 = alloca %struct.T +; The call above touched the stack, so there is room for a T object. +; CHECK: subl $3000, %esp + + %p4 = alloca %struct.U +; The U object is large enough to require stack probing. +; CHECK: movl $10000, %eax +; CHECK: calll __chkstk + + %p5 = alloca %struct.T +; The stack probing above touched the tip of the stack, so there's room for a T. +; CHECK: subl $3000, %esp + +; Use the pointers so they're not optimized away. + call void @f(%struct.S* %p1) + call void @g(%struct.T* %p2) + call void @g(%struct.T* %p3) + call void @h(%struct.U* %p4) + call void @g(%struct.T* %p5) + ret void +} + +define void @loop() { +; CHECK-LABEL: loop: +entry: + br label %bb1 + +bb1: + %p1 = alloca %struct.S +; The entry offset is unknown; touch-and-sub. +; CHECK: pushl %eax +; CHECK: subl $1020, %esp + br label %loop1 + +loop1: + %i1 = phi i32 [ 10, %bb1 ], [ %dec1, %loop1 ] + %p2 = alloca %struct.S +; We know the incoming offset from bb1, but from the back-edge, we assume the +; worst, and therefore touch-and-sub to allocate. +; CHECK: pushl %eax +; CHECK: subl $1020, %esp + %dec1 = sub i32 %i1, 1 + %cmp1 = icmp sgt i32 %i1, 0 + br i1 %cmp1, label %loop1, label %end +; CHECK: decl +; CHECK: jg + +end: + call void @f(%struct.S* %p1) + call void @f(%struct.S* %p2) + ret void +} + + +declare void @f(%struct.S*) +declare void @g(%struct.T*) +declare void @h(%struct.U*)