Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -420,6 +420,10 @@ else Symbol = "_chkstk"; + if (!STI.isOSWindows()) { + Symbol = "__probestack"; + } + MachineInstrBuilder CI; // All current stack probes take AX and SP as input, clobber flags, and @@ -442,13 +446,14 @@ .addReg(SP, RegState::Define | RegState::Implicit) .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - if (Is64Bit) { + if (!STI.isTargetWin32()) { // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp // themselves. It also does not clobber %rax so we can reuse it when // adjusting %rsp. - BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP) - .addReg(X86::RSP) - .addReg(X86::RAX); + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::SUB64rr : X86::SUB32rr), + SP) + .addReg(SP) + .addReg(AX); } } @@ -608,7 +613,9 @@ X86FI->setCalleeSavedFrameSize( X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); - bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO()); + bool UseRedZone = false; + bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO()) || + MF.shouldProbeStack(); // The default stack probe size is 4096 if the function has no stackprobesize // attribute. @@ -628,12 +635,19 @@ !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. !IsWin64 && // Win64 has no Red Zone + + !(UseStackProbe && StackSize > 128) && // Only use the Red Zone if we can + // fit the whole stack in it + // and thus stack probes won't be + // needed + !usesTheStack(MF) && // Don't push and pop. !MF.shouldSplitStack()) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); MFI->setStackSize(StackSize); + UseRedZone = true; } // Insert stack pointer adjustment for later moving of return addr. Only @@ -804,41 +818,42 @@ if (IsWinEH && RegInfo->needsStackRealignment(MF)) AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign); if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { - // Check whether EAX is livein for this function. - bool isEAXAlive = isEAXLiveIn(MF); - - if (isEAXAlive) { - // Sanity check that EAX is not livein for this function. - // It should not be, so throw an assert. - assert(!Is64Bit && "EAX is livein in x64 case!"); - - // Save EAX - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) - .addReg(X86::EAX, RegState::Kill) + assert(!UseRedZone && "The Red Zone is not accounted for in stack probes"); + // Check whether the accumulator register is livein for this function. + bool isRegAccAlive = isEAXLiveIn(MF); + auto RegAcc = Is64Bit ? X86::RAX : X86::EAX; + + if (isRegAccAlive) { + // Save RegAcc + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(RegAcc, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); } + uint64_t NumBytesAdj = isRegAccAlive ? NumBytes - (Is64Bit ? 8 : 4) : + NumBytes; + if (Is64Bit) { // Handle the 64-bit Windows ABI case where we need to call __chkstk. // Function prologue is responsible for adjusting the stack pointer. - if (isUInt<32>(NumBytes)) { + if (isUInt<32>(NumBytesAdj)) { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes) + .addImm(NumBytesAdj) .setMIFlag(MachineInstr::FrameSetup); - } else if (isInt<32>(NumBytes)) { + } else if (isInt<32>(NumBytesAdj)) { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX) - .addImm(NumBytes) + .addImm(NumBytesAdj) .setMIFlag(MachineInstr::FrameSetup); } else { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) - .addImm(NumBytes) + .addImm(NumBytesAdj) .setMIFlag(MachineInstr::FrameSetup); } } else { // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. // We'll also use 4 already allocated bytes for EAX. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) + .addImm(NumBytesAdj) .setMIFlag(MachineInstr::FrameSetup); } @@ -853,11 +868,12 @@ for (; SetRAX != MBBI; ++SetRAX) SetRAX->setFlag(MachineInstr::FrameSetup); - if (isEAXAlive) { - // Restore EAX - MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), - X86::EAX), - StackPtr, false, NumBytes - 4); + if (isRegAccAlive) { + // Restore RegAcc + auto MIB = BuildMI(MF, DL, + TII.get(Is64Bit ? X86::MOV64rm : X86::MOV32rm), + RegAcc); + MachineInstr *MI = addRegOffset(MIB, StackPtr, false, NumBytesAdj); MI->setFlag(MachineInstr::FrameSetup); MBB.insert(MBBI, MI); } Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -14403,7 +14403,7 @@ MachineFunction &MF = DAG.getMachineFunction(); bool SplitStack = MF.shouldSplitStack(); bool Lower = (Subtarget->isOSWindows() && !Subtarget->isTargetMachO()) || - SplitStack; + SplitStack || MF.shouldProbeStack(); SDLoc dl(Op); if (!Lower) { Index: test/CodeGen/X86/stack-probes.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/stack-probes.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck --check-prefix=X86-Linux %s +; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=X64-Linux %s + +declare void @use([40096 x i8]*) + +; Ensure calls to __probestack occur for large stack frames +define void @test() "probe-stack" { + %array = alloca [40096 x i8], align 16 + call void @use([40096 x i8]* %array) + ret void + +; X86-Linux-LABEL: test: +; X86-Linux: movl $40124, %eax # imm = 0x9CBC +; X86-Linux-NEXT: calll __probestack +; X86-Linux-NEXT: subl %eax, %esp + +; X64-Linux-LABEL: test: +; X64-Linux: movl $40104, %eax # imm = 0x9CA8 +; X64-Linux-NEXT: callq __probestack +; X64-Linux-NEXT: subq %rax, %rsp + +}