Index: include/llvm/CodeGen/CommandFlags.h =================================================================== --- include/llvm/CodeGen/CommandFlags.h +++ include/llvm/CodeGen/CommandFlags.h @@ -191,10 +191,16 @@ cl::desc("Assume the creation of a position independent executable."), cl::init(false)); -cl::opt +cl::opt SegmentedStacks("segmented-stacks", cl::desc("Use segmented stacks if possible."), - cl::init(false)); + cl::init(SplitStacks::PerFn), + cl::values( + clEnumValN(SplitStacks::PerFn, "per-fn", + "Enable segmented stacks on a per-fn basis with a 'split-stack' attr."), + clEnumValN(SplitStacks::Global, "global", + "Enable segmented stacks globally."), + clEnumValEnd)); cl::opt UseInitArray("use-init-array", Index: include/llvm/CodeGen/MachineFunction.h =================================================================== --- include/llvm/CodeGen/MachineFunction.h +++ include/llvm/CodeGen/MachineFunction.h @@ -260,6 +260,9 @@ return MBBNumbering[N]; } + /// Should we be emitting segmented stack stuff for the function + bool shouldSplitStack(); + /// getNumBlockIDs - Return the number of MBB ID's allocated. /// unsigned getNumBlockIDs() const { return (unsigned)MBBNumbering.size(); } Index: include/llvm/Target/TargetOptions.h =================================================================== --- include/llvm/Target/TargetOptions.h +++ include/llvm/Target/TargetOptions.h @@ -38,6 +38,13 @@ }; } + namespace SplitStacks { + enum SplitStacksMode { + PerFn, // Enable segmented stacks on a per-fn basis with a 'split-stack' attr + Global // Enable segmented stacks globally + }; + } + class TargetOptions { public: TargetOptions() @@ -48,9 +55,9 @@ UseSoftFloat(false), NoZerosInBSS(false), JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false), GuaranteedTailCallOpt(false), DisableTailCalls(false), - StackAlignmentOverride(0), + EnableSegmentedStacks(SplitStacks::PerFn), StackAlignmentOverride(0), EnableFastISel(false), PositionIndependentExecutable(false), - EnableSegmentedStacks(false), UseInitArray(false), TrapFuncName(""), + UseInitArray(false), TrapFuncName(""), FloatABIType(FloatABI::Default), AllowFPOpFusion(FPOpFusion::Standard) {} @@ -138,6 +145,10 @@ /// Disabling them may be useful to maintain a correct call stack. unsigned DisableTailCalls : 1; + /// EnableSegmentedStacks - This selects whether to enable segmented stacks + /// globally or on a per-fn basis (the ones with the 'split-stack' attribute) + SplitStacks::SplitStacksMode EnableSegmentedStacks; + /// StackAlignmentOverride - Override default stack alignment for target. unsigned StackAlignmentOverride; @@ -152,8 +163,6 @@ /// if the relocation model is anything other than PIC. unsigned PositionIndependentExecutable : 1; - unsigned EnableSegmentedStacks : 1; - /// UseInitArray - Use .init_array instead of .ctors for static /// constructors. unsigned UseInitArray : 1; Index: lib/CodeGen/MachineFunction.cpp =================================================================== --- lib/CodeGen/MachineFunction.cpp +++ lib/CodeGen/MachineFunction.cpp @@ -124,6 +124,12 @@ return JumpTableInfo; } +/// Should we be emitting segmented stack stuff for the function +bool MachineFunction::shouldSplitStack() { + return getTarget().Options.EnableSegmentedStacks == SplitStacks::Global || + getFunction()->hasFnAttribute("split-stack"); +} + /// RenumberBlocks - This discards all of the MachineBasicBlock numbers and /// recomputes them. This guarantees that the MBB numbers are sequential, /// dense, and match the ordering of the blocks within the function. If a Index: lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- lib/CodeGen/PrologEpilogInserter.cpp +++ lib/CodeGen/PrologEpilogInserter.cpp @@ -707,7 +707,7 @@ // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. - if (Fn.getTarget().Options.EnableSegmentedStacks) + if (Fn.shouldSplitStack()) TFI.adjustForSegmentedStacks(Fn); // Emit additional code that is required to explicitly handle the stack in Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -439,7 +439,7 @@ !MFI->adjustsStack() && // No calls. !IsWin64 && // Win64 has no Red Zone !usesTheStack(MF) && // Don't push and pop. - !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack + !MF.shouldSplitStack()) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -631,15 +631,8 @@ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) - setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? - MVT::i64 : MVT::i32, Custom); - else if (TM.Options.EnableSegmentedStacks) - setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? - MVT::i64 : MVT::i32, Custom); - else - setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? - MVT::i64 : MVT::i32, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? + MVT::i64 : MVT::i32, Custom); if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) { // f32 and f64 use SSE. @@ -10739,13 +10732,51 @@ SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { - assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() || - getTargetMachine().Options.EnableSegmentedStacks) && - "This should be used only on Windows targets or when segmented stacks " - "are being used"); - assert(!Subtarget->isTargetEnvMacho() && "Not implemented"); + + MachineFunction &MF = DAG.getMachineFunction(); + bool SplitStack = MF.shouldSplitStack(); + bool Lower = (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) || + SplitStack; SDLoc dl(Op); + if (!Lower) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDNode* Node = Op.getNode(); + + unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" + " not tell us which reg is the stack pointer!"); + EVT VT = Node->getValueType(0); + SDValue Tmp1 = SDValue(Node, 0); + SDValue Tmp2 = SDValue(Node, 1); + SDValue Tmp3 = Node->getOperand(2); + SDValue Chain = Tmp1.getOperand(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true), + SDLoc(Node)); + + SDValue Size = Tmp2.getOperand(1); + SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); + Chain = SP.getValue(1); + unsigned Align = cast(Tmp3)->getZExtValue(); + const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering(); + unsigned StackAlign = TFI.getStackAlignment(); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + if (Align > StackAlign) + Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, + DAG.getConstant(-(uint64_t)Align, VT)); + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain + + Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), + DAG.getIntPtrConstant(0, true), SDValue(), + SDLoc(Node)); + + SDValue Ops[2] = { Tmp1, Tmp2 }; + return DAG.getMergeValues(Ops, 2, dl); + } + // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); @@ -10755,8 +10786,7 @@ bool Is64Bit = Subtarget->is64Bit(); EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32; - if (getTargetMachine().Options.EnableSegmentedStacks) { - MachineFunction &MF = DAG.getMachineFunction(); + if (SplitStack) { MachineRegisterInfo &MRI = MF.getRegInfo(); if (Is64Bit) { @@ -15275,7 +15305,7 @@ MachineFunction *MF = BB->getParent(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); - assert(getTargetMachine().Options.EnableSegmentedStacks); + assert(MF->shouldSplitStack()); unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; unsigned TlsOffset = Is64Bit ? 0x70 : 0x30; Index: test/CodeGen/X86/segmented-stacks-dynamic.ll =================================================================== --- test/CodeGen/X86/segmented-stacks-dynamic.ll +++ test/CodeGen/X86/segmented-stacks-dynamic.ll @@ -1,12 +1,12 @@ -; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32 -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64 -; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj ; Just to prevent the alloca from being optimized away declare void @dummy_use(i32*, i32) -define i32 @test_basic(i32 %l) { +define i32 @test_basic(i32 %l) #0 { %mem = alloca i32, i32 %l call void @dummy_use (i32* %mem, i32 %l) %terminate = icmp eq i32 %l, 0 @@ -62,3 +62,5 @@ ; X64: movq %rax, %rdi } + +attributes #0 = { "split-stack" } Index: test/CodeGen/X86/segmented-stacks.ll =================================================================== --- test/CodeGen/X86/segmented-stacks.ll +++ test/CodeGen/X86/segmented-stacks.ll @@ -1,23 +1,23 @@ -; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux -; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin -; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-FreeBSD +; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux +; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin +; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -verify-machineinstrs | FileCheck %s -check-prefix=X64-FreeBSD ; We used to crash with filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -filetype=obj -; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris -segmented-stacks 2> %t.log +; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris 2> %t.log ; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-Solaris -; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -segmented-stacks 2> %t.log +; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-mingw32 2> %t.log ; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-MinGW -; RUN: not llc < %s -mcpu=generic -mtriple=i686-freebsd -segmented-stacks 2> %t.log +; RUN: not llc < %s -mcpu=generic -mtriple=i686-freebsd 2> %t.log ; RUN: FileCheck %s -input-file=%t.log -check-prefix=X32-FreeBSD ; X64-Solaris: Segmented stacks not supported on this platform @@ -27,7 +27,7 @@ ; Just to prevent the alloca from being optimized away declare void @dummy_use(i32*, i32) -define void @test_basic() { +define void @test_basic() #0 { %mem = alloca i32, i32 10 call void @dummy_use (i32* %mem, i32 10) ret void @@ -95,7 +95,7 @@ } -define i32 @test_nested(i32 * nest %closure, i32 %other) { +define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { %addend = load i32 * %closure %result = add i32 %other, %addend ret i32 %result @@ -157,7 +157,7 @@ } -define void @test_large() { +define void @test_large() #0 { %mem = alloca i32, i32 10000 call void @dummy_use (i32* %mem, i32 0) ret void @@ -219,7 +219,7 @@ } -define fastcc void @test_fastcc() { +define fastcc void @test_fastcc() #0 { %mem = alloca i32, i32 10 call void @dummy_use (i32* %mem, i32 10) ret void @@ -287,7 +287,7 @@ } -define fastcc void @test_fastcc_large() { +define fastcc void @test_fastcc_large() #0 { %mem = alloca i32, i32 10000 call void @dummy_use (i32* %mem, i32 0) ret void @@ -361,7 +361,7 @@ } -define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) { +define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) #0 { %mem = alloca i32, i32 10000 call void @dummy_use (i32* %mem, i32 %a) ret void @@ -383,3 +383,5 @@ ; X32-Darwin-NEXT: ret } + +attributes #0 = { "split-stack" }