Index: include/llvm/CodeGen/MachineFunction.h =================================================================== --- include/llvm/CodeGen/MachineFunction.h +++ include/llvm/CodeGen/MachineFunction.h @@ -278,6 +278,13 @@ /// Should we be emitting segmented stack stuff for the function bool shouldSplitStack(); + /// Should we be probing the stack for the function. + /// Probing the stack means that we must read or write to the stack on every + /// page. This is to ensure that a guard page will be hit and stack overflow + /// can be detected. We insert instructions to do this when allocating from + /// the stack. + bool shouldProbeStack() const; + /// getNumBlockIDs - Return the number of MBB ID's allocated. /// unsigned getNumBlockIDs() const { return (unsigned)MBBNumbering.size(); } Index: lib/CodeGen/MachineFunction.cpp =================================================================== --- lib/CodeGen/MachineFunction.cpp +++ lib/CodeGen/MachineFunction.cpp @@ -130,6 +130,15 @@ return getFunction()->hasFnAttribute("split-stack"); } +/// Should we be probing the stack for the function. +/// Probing the stack means that we must read or write to the stack on every +/// page. This is to ensure that a guard page will be hit and stack overflow +/// can be detected. We insert instructions to do this when allocating from +/// the stack. +bool MachineFunction::shouldProbeStack() const { + return getFunction()->hasFnAttribute("probe-stack"); +} + /// RenumberBlocks - This discards all of the MachineBasicBlock numbers and /// recomputes them. This guarantees that the MBB numbers are sequential, /// dense, and match the ordering of the blocks within the function. If a Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -409,17 +409,20 @@ CallOp = X86::CALLpcrel32; const char *Symbol; - if (Is64Bit) { - if (STI.isTargetCygMing()) { - Symbol = "___chkstk_ms"; - } else { - Symbol = "__chkstk"; - } - } else if (STI.isTargetCygMing()) - Symbol = "_alloca"; - else - Symbol = "_chkstk"; - + if (STI.isOSWindows()) { + if (Is64Bit) { + if (STI.isTargetCygMing()) { + Symbol = "___chkstk_ms"; + } else { + Symbol = "__chkstk"; + } + } else if (STI.isTargetCygMing()) + Symbol = "_alloca"; + else + Symbol = "_chkstk"; + } else { + Symbol = "__probestack"; + } MachineInstrBuilder CI; // All current stack probes take AX and SP as input, clobber flags, and @@ -442,13 +445,13 @@ .addReg(SP, RegState::Define | RegState::Implicit) .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - if (Is64Bit) { + if (!STI.isTargetWin32()) { // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp // themselves. It also does not clobber %rax so we can reuse it when // adjusting %rsp. - BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP) - .addReg(X86::RSP) - .addReg(X86::RAX); + BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Is64Bit)), SP) + .addReg(SP) + .addReg(AX); } } @@ -608,7 +611,9 @@ X86FI->setCalleeSavedFrameSize( X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); - bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO()); + bool UseRedZone = false; + bool UseStackProbe = + (STI.isOSWindows() && !STI.isTargetMachO()) || MF.shouldProbeStack(); // The default stack probe size is 4096 if the function has no stackprobesize // attribute. @@ -628,12 +633,19 @@ !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. !IsWin64 && // Win64 has no Red Zone + + !(UseStackProbe && StackSize > 128) && // Only use the Red Zone if we can + // fit the whole stack in it + // and thus stack probes won't be + // needed + !usesTheStack(MF) && // Don't push and pop. !MF.shouldSplitStack()) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); MFI->setStackSize(StackSize); + UseRedZone = true; } // Insert stack pointer adjustment for later moving of return addr. Only @@ -804,42 +816,42 @@ if (IsWinEH && RegInfo->needsStackRealignment(MF)) AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign); if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { - // Check whether EAX is livein for this function. - bool isEAXAlive = isEAXLiveIn(MF); - - if (isEAXAlive) { - // Sanity check that EAX is not livein for this function. - // It should not be, so throw an assert. - assert(!Is64Bit && "EAX is livein in x64 case!"); - - // Save EAX - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) - .addReg(X86::EAX, RegState::Kill) - .setMIFlag(MachineInstr::FrameSetup); + assert(!UseRedZone && "The Red Zone is not accounted for in stack probes"); + // Check whether the accumulator register is livein for this function. + bool IsRegAccAlive = isEAXLiveIn(MF); + auto RegAcc = Is64Bit ? X86::RAX : X86::EAX; + + if (IsRegAccAlive) { + // Save RegAcc + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(RegAcc, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); } + uint64_t NumBytesAdj = IsRegAccAlive ? NumBytes - SlotSize : NumBytes; + if (Is64Bit) { // Handle the 64-bit Windows ABI case where we need to call __chkstk. // Function prologue is responsible for adjusting the stack pointer. - if (isUInt<32>(NumBytes)) { + if (isUInt<32>(NumBytesAdj)) { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes) + .addImm(NumBytesAdj) .setMIFlag(MachineInstr::FrameSetup); - } else if (isInt<32>(NumBytes)) { + } else if (isInt<32>(NumBytesAdj)) { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX) - .addImm(NumBytes) + .addImm(NumBytesAdj) .setMIFlag(MachineInstr::FrameSetup); } else { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) - .addImm(NumBytes) + .addImm(NumBytesAdj) .setMIFlag(MachineInstr::FrameSetup); } } else { // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. // We'll also use 4 already allocated bytes for EAX. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) - .setMIFlag(MachineInstr::FrameSetup); + .addImm(NumBytesAdj) + .setMIFlag(MachineInstr::FrameSetup); } // Save a pointer to the MI where we set AX. @@ -853,11 +865,12 @@ for (; SetRAX != MBBI; ++SetRAX) SetRAX->setFlag(MachineInstr::FrameSetup); - if (isEAXAlive) { - // Restore EAX - MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), - X86::EAX), - StackPtr, false, NumBytes - 4); + if (IsRegAccAlive) { + // Restore RegAcc + auto MIB = BuildMI(MF, DL, + TII.get(Is64Bit ? X86::MOV64rm : X86::MOV32rm), + RegAcc); + MachineInstr *MI = addRegOffset(MIB, StackPtr, false, NumBytesAdj); MI->setFlag(MachineInstr::FrameSetup); MBB.insert(MBBI, MI); } Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -14403,7 +14403,7 @@ MachineFunction &MF = DAG.getMachineFunction(); bool SplitStack = MF.shouldSplitStack(); bool Lower = (Subtarget->isOSWindows() && !Subtarget->isTargetMachO()) || - SplitStack; + SplitStack || MF.shouldProbeStack(); SDLoc dl(Op); if (!Lower) { Index: test/CodeGen/X86/stack-probes.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/stack-probes.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck --check-prefix=X86-LINUX %s +; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=X64-LINUX %s + +declare void @use([40096 x i8]*) + +; Ensure calls to __probestack occur for large stack frames +define void @test() "probe-stack" { + %array = alloca [40096 x i8], align 16 + call void @use([40096 x i8]* %array) + ret void + +; X86-LINUX-LABEL: test: +; X86-LINUX: movl $40124, %eax # imm = 0x9CBC +; X86-LINUX-NEXT: calll __probestack +; X86-LINUX-NEXT: subl %eax, %esp + +; X64-LINUX-LABEL: test: +; X64-LINUX: movl $40104, %eax # imm = 0x9CA8 +; X64-LINUX-NEXT: callq __probestack +; X64-LINUX-NEXT: subq %rax, %rsp + +}