Index: include/llvm/CodeGen/MachineFunction.h =================================================================== --- include/llvm/CodeGen/MachineFunction.h +++ include/llvm/CodeGen/MachineFunction.h @@ -287,6 +287,14 @@ /// Should we be emitting segmented stack stuff for the function bool shouldSplitStack(); + /// \brief Should we be probing the stack for the function. + /// + /// Probing the stack means that we must read or write to the stack on every + /// page. This is to ensure that a guard page will be hit and stack overflow + /// can be detected. We insert instructions to do this when allocating from + /// the stack. + bool shouldProbeStack() const; + /// getNumBlockIDs - Return the number of MBB ID's allocated. /// unsigned getNumBlockIDs() const { return (unsigned)MBBNumbering.size(); } Index: lib/CodeGen/MachineFunction.cpp =================================================================== --- lib/CodeGen/MachineFunction.cpp +++ lib/CodeGen/MachineFunction.cpp @@ -147,6 +147,10 @@ return getFunction()->hasFnAttribute("split-stack"); } +bool MachineFunction::shouldProbeStack() const { + return getFunction()->hasFnAttribute("probe-stack"); +} + /// This discards all of the MachineBasicBlock numbers and recomputes them. /// This guarantees that the MBB numbers are sequential, dense, and match the /// ordering of the blocks within the function. If a specific MachineBasicBlock Index: lib/Target/X86/X86FrameLowering.h =================================================================== --- lib/Target/X86/X86FrameLowering.h +++ lib/Target/X86/X86FrameLowering.h @@ -47,6 +47,20 @@ unsigned StackPtr; + void pushRegForStackProbeCall(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, + bool &IsAlive, + unsigned RegType, + uint64_t &NumBytes) const; + void popRegForStackProbeCall(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, + bool &IsAlive, + unsigned RegType, + uint64_t &NumBytes) const; /// Emit a call to the target's stack probe function. This is required for all /// large stack allocations on Windows. The caller is required to materialize /// the number of bytes to probe in RAX/EAX. Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -197,13 +197,14 @@ return 0; } -static bool isEAXLiveIn(MachineFunction &MF) { +static bool isLiveIn(MachineFunction &MF, unsigned CheckReg) { + CheckReg = getX86SubSuperRegister(CheckReg, MVT::i32); + for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), EE = MF.getRegInfo().livein_end(); II != EE; ++II) { unsigned Reg = II->first; - if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || - Reg == X86::AH || Reg == X86::AL) + if (getX86SubSuperRegisterOrZero(Reg, MVT::i32) == CheckReg) return true; } @@ -250,7 +251,7 @@ // load the offset into a register and do one sub/add unsigned Reg = 0; - if (isSub && !isEAXLiveIn(*MBB.getParent())) + if (isSub && !isLiveIn(*MBB.getParent(), X86::EAX)) Reg = (unsigned)(Is64Bit ? X86::RAX : X86::EAX); else Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); @@ -425,6 +426,55 @@ return false; } +void X86FrameLowering::pushRegForStackProbeCall(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, + bool &IsAlive, + unsigned RegType, + uint64_t &NumBytes) const { + IsAlive = isLiveIn(MF, RegType); + + if (!IsAlive) { + return; + } + + auto Reg = getX86SubSuperRegister(RegType, Is64Bit ? MVT::i64 : MVT::i32); + + // Save the register on the stack. + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(Reg, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); + + // Reuse the space from the spill as a stack allocation. + NumBytes -= SlotSize; +} + +void X86FrameLowering::popRegForStackProbeCall(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, + bool &IsAlive, + unsigned RegType, + uint64_t &NumBytes) const { + if (!IsAlive) { + return; + } + + // Restore the register from the stack slot. + + auto Reg = getX86SubSuperRegister(RegType, Is64Bit ? MVT::i64 : MVT::i32); + + auto MIB = BuildMI(MF, DL, + TII.get(Is64Bit ? X86::MOV64rm : X86::MOV32rm), + Reg); + MachineInstr *MI = addRegOffset(MIB, StackPtr, false, NumBytes); + MI->setFlag(MachineInstr::FrameSetup); + MBB.insert(MBBI, MI); + + NumBytes += SlotSize; +} + void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -438,17 +488,21 @@ CallOp = X86::CALLpcrel32; const char *Symbol; - if (Is64Bit) { - if (STI.isTargetCygMing()) { - Symbol = "___chkstk_ms"; + if (STI.isOSWindows()) { + if (Is64Bit) { + if (STI.isTargetCygMing()) { + Symbol = "___chkstk_ms"; + } else { + Symbol = "__chkstk"; + } + } else if (STI.isTargetCygMing()) { + Symbol = "_alloca"; } else { - Symbol = "__chkstk"; + Symbol = "_chkstk"; } - } else if (STI.isTargetCygMing()) - Symbol = "_alloca"; - else - Symbol = "_chkstk"; - + } else { + Symbol = "__probestack"; + } MachineInstrBuilder CI; // All current stack probes take AX and SP as input, clobber flags, and @@ -471,13 +525,13 @@ .addReg(SP, RegState::Define | RegState::Implicit) .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - if (Is64Bit) { + if (!STI.isTargetWin32()) { // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp // themselves. It also does not clobber %rax so we can reuse it when // adjusting %rsp. - BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP) - .addReg(X86::RSP) - .addReg(X86::RAX); + BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Is64Bit)), SP) + .addReg(SP) + .addReg(AX); } } @@ -641,7 +695,9 @@ X86FI->setCalleeSavedFrameSize( X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); - bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO()); + bool UseRedZone = false; + bool UseStackProbe = + (STI.isOSWindows() && !STI.isTargetMachO()) || MF.shouldProbeStack(); // The default stack probe size is 4096 if the function has no stackprobesize // attribute. @@ -661,12 +717,19 @@ !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. !IsWin64CC && // Win64 has no Red Zone + + !(UseStackProbe && StackSize > 128) && // Only use the Red Zone if we can + // fit the whole stack in it + // and thus stack probes won't be + // needed + !usesTheStack(MF) && // Don't push and pop. !MF.shouldSplitStack()) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); MFI->setStackSize(StackSize); + UseRedZone = true; } // Insert stack pointer adjustment for later moving of return addr. Only @@ -815,18 +878,26 @@ if (IsWin64Prologue && TRI->needsStackRealignment(MF)) AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign); if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { - // Check whether EAX is livein for this function. - bool isEAXAlive = isEAXLiveIn(MF); + assert(!UseRedZone && "The Red Zone is not accounted for in stack probes"); - if (isEAXAlive) { - // Sanity check that EAX is not livein for this function. - // It should not be, so throw an assert. - assert(!Is64Bit && "EAX is livein in x64 case!"); + // In the large code model, we have to load the stack probe function into a scratch + // register to call it. R11 is used for that. + bool SpillR11 = Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large; - // Save EAX - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) - .addReg(X86::EAX, RegState::Kill) - .setMIFlag(MachineInstr::FrameSetup); + // We spill the registers we need to call the stack probe function. + + bool RAXAlive, RBXAlive, R11Alive; + + pushRegForStackProbeCall(MF, MBB, MBBI, DL, RAXAlive, X86::RAX, NumBytes); + pushRegForStackProbeCall(MF, MBB, MBBI, DL, RBXAlive, X86::RBX, NumBytes); + if (SpillR11) { + pushRegForStackProbeCall(MF, MBB, MBBI, DL, R11Alive, X86::R11, NumBytes); + } + + if (!STI.isOSWindows()) { + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EDX) + .addImm(0x1000) + .setMIFlag(MachineInstr::FrameSetup); } if (Is64Bit) { @@ -846,11 +917,9 @@ .setMIFlag(MachineInstr::FrameSetup); } } else { - // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. - // We'll also use 4 already allocated bytes for EAX. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) - .setMIFlag(MachineInstr::FrameSetup); + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); } // Save a pointer to the MI where we set AX. @@ -864,14 +933,12 @@ for (; SetRAX != MBBI; ++SetRAX) SetRAX->setFlag(MachineInstr::FrameSetup); - if (isEAXAlive) { - // Restore EAX - MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), - X86::EAX), - StackPtr, false, NumBytes - 4); - MI->setFlag(MachineInstr::FrameSetup); - MBB.insert(MBBI, MI); + // Now we restore the spilled registers from the stack + if (SpillR11) { + popRegForStackProbeCall(MF, MBB, MBBI, DL, R11Alive, X86::R11, NumBytes); } + popRegForStackProbeCall(MF, MBB, MBBI, DL, RBXAlive, X86::RBX, NumBytes); + popRegForStackProbeCall(MF, MBB, MBBI, DL, RAXAlive, X86::RAX, NumBytes); } else if (NumBytes) { emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, /*InEpilogue=*/false); } Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -14903,7 +14903,7 @@ MachineFunction &MF = DAG.getMachineFunction(); bool SplitStack = MF.shouldSplitStack(); bool Lower = (Subtarget->isOSWindows() && !Subtarget->isTargetMachO()) || - SplitStack; + SplitStack || MF.shouldProbeStack(); SDLoc dl(Op); if (!Lower) { @@ -14981,6 +14981,14 @@ Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); Flag = Chain.getValue(1); + + if (!Subtarget->isOSWindows()) { + const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RBX : X86::EBX); + + Chain = DAG.getCopyToReg(Chain, dl, Reg, DAG.getConstant(0x1000, dl, SPTy), Flag); + Flag = Chain.getValue(1); + } + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); @@ -20447,8 +20455,6 @@ MachineBasicBlock *BB) const { DebugLoc DL = MI->getDebugLoc(); - assert(!Subtarget->isTargetMachO()); - Subtarget->getFrameLowering()->emitStackProbeCall(*BB->getParent(), *BB, MI, DL); Index: test/CodeGen/X86/stack-probe-call-spills-EBX.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/stack-probe-call-spills-EBX.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck --check-prefix=X86-LINUX %s + +declare void @use([40096 x i8]*) + +; Ensure calls to __probestack spills EBX +define cc 10 i32 @spillBX(i32 %Base) "probe-stack" { + %array = alloca [40096 x i8], align 16 + call void @use([40096 x i8]* %array) + ret i32 %Base + +; X86-LINUX-LABEL: spillBX: +; X86-LINUX: pushl %ebx +; X86-LINUX: movl $4096, %edx # imm = 0x1000 +; X86-LINUX: movl $40124, %eax # imm = 0x9CBC +; X86-LINUX-NEXT: calll __probestack +; X86-LINUX-NEXT: subl %eax, %esp +; X86-LINUX-NEXT: movl 40120(%esp), %ebx + +} Index: test/CodeGen/X86/stack-probe-call-spills-RBX.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/stack-probe-call-spills-RBX.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=X64-LINUX %s + +declare void @use([40096 x i8]*) + +; Ensure calls to __probestack spills RBX +define cc 10 i32 @spillBX(i32 %Base, i32 %Sp, i32 %Hp, i32 %R1) "probe-stack" { + %array = alloca [40096 x i8], align 16 + call void @use([40096 x i8]* %array) + ret i32 %R1 + +; X64-LINUX-LABEL: spillBX: +; X64-LINUX: pushq %rbx +; X64-LINUX: movl $4096, %edx # imm = 0x1000 +; X64-LINUX: movl $40006, %eax # imm = 0x9CA0 +; X64-LINUX-NEXT: callq __probestack +; X64-LINUX-NEXT: subq %rax, %rsp +; X64-LINUX-NEXT: movq 40096(%rsp), %rbx + +} Index: test/CodeGen/X86/stack-probes.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/stack-probes.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck --check-prefix=X86-LINUX %s +; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=X64-LINUX %s + +declare void @use([40096 x i8]*) + +; Ensure calls to __probestack occur for large stack frames +define void @test() "probe-stack" { + %array = alloca [40096 x i8], align 16 + call void @use([40096 x i8]* %array) + ret void + +; X86-LINUX-LABEL: test: +; X86-LINUX: movl $4096, %edx # imm = 0x1000 +; X86-LINUX: movl $40124, %eax # imm = 0x9CBC +; X86-LINUX-NEXT: calll __probestack +; X86-LINUX-NEXT: subl %eax, %esp + +; X64-LINUX-LABEL: test: +; X64-LINUX: movl $4096, %edx # imm = 0x1000 +; X64-LINUX: movl $40104, %eax # imm = 0x9CA8 +; X64-LINUX-NEXT: callq __probestack +; X64-LINUX-NEXT: subq %rax, %rsp + +}