diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -135,6 +135,7 @@ ///< enabled. CODEGENOPT(NoWarn , 1, 0) ///< Set when -Wa,--no-warn is enabled. CODEGENOPT(EnableSegmentedStacks , 1, 0) ///< Set when -fsplit-stack is enabled. +CODEGENOPT(StackClashProtector, 1, 0) ///< Set when -fstack-clash-protection is enabled. CODEGENOPT(NoImplicitFloat , 1, 0) ///< Set when -mno-implicit-float is enabled. CODEGENOPT(NoInfsFPMath , 1, 0) ///< Assume FP arguments, results not +-Inf. CODEGENOPT(NoSignedZeros , 1, 0) ///< Allow ignoring the signedness of FP zero diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -17,6 +17,9 @@ def err_fe_inline_asm : Error<"%0">, CatInlineAsm; def warn_fe_inline_asm : Warning<"%0">, CatInlineAsm, InGroup; +def warn_fe_stack_clash_protection_inline_asm : Warning< + "Unable to protect inline asm that clobbers stack pointer against stack clash">, + CatInlineAsm, InGroup; def note_fe_inline_asm : Note<"%0">, CatInlineAsm; def note_fe_inline_asm_here : Note<"instantiated into assembly here">; def err_fe_cannot_link_module : Error<"cannot link module '%0': %1">, diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -815,6 +815,8 @@ StringRef getNormalizedGCCRegisterName(StringRef Name, bool ReturnCanonical = false) const; + virtual const char *getSPRegName() const { return nullptr; } + /// Extracts a register from the passed constraint (if it is a /// single-register constraint) and the asm label expression related to a /// variable in the input or output list of an inline asm statement. diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td --- a/clang/include/clang/Driver/CC1Options.td +++ b/clang/include/clang/Driver/CC1Options.td @@ -736,6 +736,8 @@ HelpText<"Enable stack protectors">; def stack_protector_buffer_size : Separate<["-"], "stack-protector-buffer-size">, HelpText<"Lower bound for a buffer to be considered for stack protection">; +def stack_clash_protection : Separate<["-"], "stack-clash-protection">, + HelpText<"Enable stack clash protection">; def fvisibility : Separate<["-"], "fvisibility">, HelpText<"Default type and symbol visibility">; def ftype_visibility : Separate<["-"], "ftype-visibility">, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1706,6 +1706,8 @@ def fsplit_stack : Flag<["-"], "fsplit-stack">, Group; def fstack_protector_all : Flag<["-"], "fstack-protector-all">, Group, HelpText<"Enable stack protectors for all functions">; +def fstack_clash_protection : Flag<["-"], "fstack-clash-protection">, Group, + HelpText<"Enable stack clash protection">; def fstack_protector_strong : Flag<["-"], "fstack-protector-strong">, Group, HelpText<"Enable stack protectors for some functions vulnerable to stack smashing. " "Compared to -fstack-protector, this uses a stronger heuristic " diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -149,6 +149,8 @@ ArrayRef getGCCAddlRegNames() const override; + const char *getSPRegName() const override { return "rsp"; } + bool validateCpuSupports(StringRef Name) const override; bool validateCpuIs(StringRef Name) const override; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -476,6 +476,7 @@ Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS; Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath; Options.StackAlignmentOverride = CodeGenOpts.StackAlignment; + Options.StackClashProtector = CodeGenOpts.StackClashProtector; Options.FunctionSections = CodeGenOpts.FunctionSections; Options.DataSections = CodeGenOpts.DataSections; Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames; diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -10,14 +10,16 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" #include "CGDebugInfo.h" +#include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/PrettyStackTrace.h" #include "clang/Basic/TargetInfo.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -2229,8 +2231,14 @@ if (Clobber == "memory") ReadOnly = ReadNone = false; - else if (Clobber != "cc") + else if (Clobber != "cc") { Clobber = getTarget().getNormalizedGCCRegisterName(Clobber); + if (CGM.getCodeGenOpts().StackClashProtector && + Clobber == getTarget().getSPRegName()) { + CGM.getDiags().Report(S.getAsmLoc(), + diag::warn_fe_stack_clash_protection_inline_asm); + } + } if (!Constraints.empty()) Constraints += ','; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2603,6 +2603,29 @@ } } +static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args, + ArgStringList &CmdArgs) { + const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple(); + + for (const Arg *A : Args) { + switch (A->getOption().getID()) { + default: + continue; + case options::OPT_fstack_clash_protection: { + switch (EffectiveTriple.getArch()) { + default: + return; + case llvm::Triple::ArchType::x86: + case llvm::Triple::ArchType::x86_64: + break; + } + A->claim(); + CmdArgs.push_back("-stack-clash-protection"); + } + } + } +} + static void RenderTrivialAutoVarInitOptions(const Driver &D, const ToolChain &TC, const ArgList &Args, @@ -4722,6 +4745,7 @@ CmdArgs.push_back(Args.MakeArgString("-mspeculative-load-hardening")); RenderSSPOptions(TC, Args, CmdArgs, KernelOrKext); + RenderSCPOptions(TC, Args, CmdArgs); RenderTrivialAutoVarInitOptions(D, TC, Args, CmdArgs); // Translate -mstackrealign diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1213,6 +1213,8 @@ Opts.NoStackArgProbe = Args.hasArg(OPT_mno_stack_arg_probe); + Opts.StackClashProtector = Args.hasArg(OPT_stack_clash_protection); + if (Arg *A = Args.getLastArg(OPT_fobjc_dispatch_method_EQ)) { StringRef Name = A->getValue(); unsigned Method = llvm::StringSwitch(Name) diff --git a/clang/test/CodeGen/stack-clash-protection.c b/clang/test/CodeGen/stack-clash-protection.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/stack-clash-protection.c @@ -0,0 +1,40 @@ +// RUN: %clang -S -target x86_64 -o - %s -fstack-clash-protection | FileCheck %s +// RUN: %clang -target x86_64 -o %t.out %s -fstack-clash-protection && %t.out + +#include + +// CHECK-LABEL: @main +// static alloca instrumentation +// CHECK: subq $4096 +// CHECK: mov +// CHECK: subq $4096 +// +// VLA instrumentation +// CHECK: subq $4096, %rax +// CHECK: subq $4096, %rsp +// +// dynamic alloca instrumentation (1) +// CHECK: subq $4096, %rax +// CHECK: subq $4096, %rsp +// +// dynamic alloca instrumentation (2) +// CHECK: subq $4096, %rax +// CHECK: subq $4096, %rsp +// +int main(int argc, char **argv) { + int volatile static_mem[8000]; + for (size_t i = 0; i < argc * sizeof(static_mem) / sizeof(static_mem[0]); ++i) + static_mem[i] = argc * i; + + int vla[argc]; + memset(&vla[0], 0, argc); + + // also check allocation of 0 size + volatile void *mem = __builtin_alloca(argc - 1); + + int volatile *dyn_mem = alloca(sizeof(static_mem) * argc); + for (size_t i = 0; i < argc * sizeof(static_mem) / sizeof(static_mem[0]); ++i) + dyn_mem[i] = argc * i; + + return static_mem[(7999 * argc) / 2] - dyn_mem[(7999 * argc) / 2] + vla[argc - 1]; +} diff --git a/clang/test/Driver/stack-clash-protection.c b/clang/test/Driver/stack-clash-protection.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/stack-clash-protection.c @@ -0,0 +1,22 @@ +// RUN: %clang -target i386-unknown-linux -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-i386 +// SCP-i386: "-stack-clash-protection" + +// RUN: %clang -target x86_64-scei-ps4 -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-x86 +// SCP-x86: "-stack-clash-protection" + +// RUN: %clang -target armv7k-apple-watchos2.0 -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-armv7 +// SCP-armv7-NOT: "-stack-clash-protection" + +// RUN: %clang -target x86_64-unknown-linux -fstack-clash-protection -c %s 2>&1 | FileCheck %s -check-prefix=SCP-warn +// SCP-warn: warning: Unable to protect inline asm that clobbers stack pointer against stack clash + +int foo(int c) { + int r; + __asm__("sub %0, %%rsp" + : + : "rm"(c) + : "rsp"); + __asm__("mov %%rsp, %0" + : "=rm"(r)::); + return r; +} diff --git a/llvm/include/llvm/CodeGen/CommandFlags.inc b/llvm/include/llvm/CodeGen/CommandFlags.inc --- a/llvm/include/llvm/CodeGen/CommandFlags.inc +++ b/llvm/include/llvm/CodeGen/CommandFlags.inc @@ -205,6 +205,11 @@ cl::desc("Order local stack symbols."), cl::init(true)); +static cl::opt + StackClashProtector("stack-clash-protector", + cl::desc("Enable stack clash protection."), + cl::init(false)); + static cl::opt OverrideStackAlignment("stack-alignment", cl::desc("Override default stack alignment"), @@ -295,6 +300,7 @@ Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt; Options.StackAlignmentOverride = OverrideStackAlignment; Options.StackSymbolOrdering = StackSymbolOrdering; + Options.StackClashProtector = StackClashProtector; Options.UseInitArray = !UseCtors; Options.RelaxELFRelocations = RelaxELFRelocations; Options.DataSections = DataSections; diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -111,7 +111,8 @@ NoSignedZerosFPMath(false), HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), GuaranteedTailCallOpt(false), StackSymbolOrdering(true), - EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false), + StackClashProtector(false), EnableFastISel(false), + EnableGlobalISel(false), UseInitArray(false), DisableIntegratedAS(false), RelaxELFRelocations(false), FunctionSections(false), DataSections(false), UniqueSectionNames(true), TrapUnreachable(false), @@ -192,6 +193,8 @@ /// they were generated. Default is true. unsigned StackSymbolOrdering : 1; + unsigned StackClashProtector : 1; + /// EnableFastISel - This flag enables fast-path instruction selection /// which trades away generated code quality in favor of reducing /// compile time. diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -128,6 +128,10 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const; + MachineBasicBlock::iterator + findFreeStackProbe(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + int64_t MinOffset, int64_t MaxOffset) const; + /// Check that LEA can be used on SP in an epilogue sequence for \p MF. bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -343,6 +343,11 @@ } } +const DenseMap OpcodeToSPOperandIndex = { + {X86::MOV8mi, 3}, {X86::MOV16mi, 3}, {X86::MOV32mi, 3}, {X86::MOV64mi32, 3}, + {X86::MOV8mr, 3}, {X86::MOV16mr, 3}, {X86::MOV32mr, 3}, {X86::MOV64mr, 3}, +}; + MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Offset, bool InEpilogue) const { @@ -381,16 +386,78 @@ } else { bool IsSub = Offset < 0; uint64_t AbsOffset = IsSub ? -Offset : Offset; - unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset) - : getADDriOpcode(Uses64BitFramePtr, AbsOffset); - MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(AbsOffset); - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset) + : getADDriOpcode(Uses64BitFramePtr, AbsOffset); + const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; + const uint64_t PageSize = 0x1000; + uint64_t CurrentAbsOffset = 0; + const bool StackClashProtector = + MBB.getParent()->getTarget().Options.StackClashProtector; + if (StackClashProtector && !InEpilogue) { + while (CurrentAbsOffset < AbsOffset) { + uint64_t ChunkSize = std::min(AbsOffset - CurrentAbsOffset, PageSize); + MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(ChunkSize); + CurrentAbsOffset += ChunkSize; + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + + auto FreeProbeIterator = + findFreeStackProbe(MBB, MBBI, AbsOffset - CurrentAbsOffset, + AbsOffset - CurrentAbsOffset + PageSize); + if (FreeProbeIterator != MBB.end()) { + MachineInstr &FreeProbe = *FreeProbeIterator; + unsigned OffsetIndex = + OpcodeToSPOperandIndex.find(FreeProbe.getOpcode())->second; + FreeProbe.getOperand(OffsetIndex) + .setImm(FreeProbe.getOperand(OffsetIndex).getImm() - + (AbsOffset - CurrentAbsOffset)); + MBBI = std::next(FreeProbeIterator); + } else { + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)), StackPtr, + false, 0) + .addImm(0); + } + } + } else { + MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(AbsOffset); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + } } return MI; } +MachineBasicBlock::iterator X86FrameLowering::findFreeStackProbe( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int64_t MinOffset, + int64_t MaxOffset) const { + for (; MBBI != MBB.end(); ++MBBI) { + MachineInstr &MI = *MBBI; + auto StackOpCode = OpcodeToSPOperandIndex.find(MI.getOpcode()); + if (StackOpCode != OpcodeToSPOperandIndex.end()) { + auto &Dst = MI.getOperand(0); + if (!Dst.isFI()) + continue; + auto &MOOffset = MI.getOperand(StackOpCode->second); + if (MOOffset.isImm()) { + int64_t Offset = MOOffset.getImm(); + if (MinOffset <= Offset && Offset <= MaxOffset) { + return MBBI; + } + continue; + } + // don't know where we write, stopping + break; + } + if (std::any_of(MI.operands_begin(), MI.operands_end(), + [](MachineOperand &MO) { return MO.isFI(); })) { + break; // effect on rsp not modelled + } + } + return MBB.end(); +} + int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, bool doMergeWithPrevious) const { diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -537,6 +537,10 @@ // falls back to heap allocation if not. SEG_ALLOCA, + // For allocating stack space when using stack clash protector. + // Allocation is performed by block, and each block is probed. + PROBED_ALLOCA, + // Memory barriers. MEMBARRIER, MFENCE, @@ -1439,6 +1443,9 @@ MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, MachineBasicBlock *BB) const; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -22164,7 +22164,17 @@ MVT SPTy = getPointerTy(DAG.getDataLayout()); SDValue Result; - if (!Lower) { + const bool StackClashProtector = MF.getTarget().Options.StackClashProtector; + if (StackClashProtector) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + + const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); + Register Vreg = MRI.createVirtualRegister(AddrRegClass); + Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); + Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain, + DAG.getRegister(Vreg, SPTy)); + + } else if (!Lower) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" @@ -28702,6 +28712,8 @@ case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER"; case X86ISD::MFENCE: return "X86ISD::MFENCE"; case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA"; + case X86ISD::PROBED_ALLOCA: + return "X86ISD::PROBED_ALLOCA"; case X86ISD::SAHF: return "X86ISD::SAHF"; case X86ISD::RDRAND: return "X86ISD::RDRAND"; case X86ISD::RDSEED: return "X86ISD::RDSEED"; @@ -29959,6 +29971,93 @@ } MachineBasicBlock * +X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, + MachineBasicBlock *BB) const { + MachineFunction *MF = BB->getParent(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + + const bool Is64Bit = Subtarget.is64Bit(); + const bool IsLP64 = Subtarget.isTarget64BitLP64(); + + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineBasicBlock *testMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB); + + MachineFunction::iterator MBBIter = ++BB->getIterator(); + MF->insert(MBBIter, testMBB); + MF->insert(MBBIter, blockMBB); + MF->insert(MBBIter, tailMBB); + + unsigned sizeVReg = MI.getOperand(1).getReg(); + + const TargetRegisterClass *SizeRegClass = MRI.getRegClass(sizeVReg); + + unsigned tmpSizeVReg = MRI.createVirtualRegister(SizeRegClass); + unsigned tmpSizeVReg2 = MRI.createVirtualRegister(SizeRegClass); + + unsigned physSPReg = + IsLP64 || Subtarget.isTargetNaCl64() ? X86::RSP : X86::ESP; + + // test rsp size + BuildMI(testMBB, DL, TII->get(X86::PHI), tmpSizeVReg) + .addReg(sizeVReg) + .addMBB(BB) + .addReg(tmpSizeVReg2) + .addMBB(blockMBB); + + BuildMI(testMBB, DL, TII->get(IsLP64 ? X86::CMP64ri32 : X86::CMP32ri)) + .addReg(tmpSizeVReg) + .addImm(4096); + + BuildMI(testMBB, DL, TII->get(X86::JCC_1)) + .addMBB(tailMBB) + .addImm(X86::COND_L); + testMBB->addSuccessor(blockMBB); + testMBB->addSuccessor(tailMBB); + + // allocate a block and touch it + + BuildMI(blockMBB, DL, TII->get(IsLP64 ? X86::SUB64ri32 : X86::SUB32ri), + tmpSizeVReg2) + .addReg(tmpSizeVReg) + .addImm(4096); + + BuildMI(blockMBB, DL, TII->get(IsLP64 ? X86::SUB64ri32 : X86::SUB32ri), + physSPReg) + .addReg(physSPReg) + .addImm(4096); + + const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; + addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0) + .addImm(0); + + BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB); + blockMBB->addSuccessor(testMBB); + + // allocate the tail and continue + BuildMI(tailMBB, DL, TII->get(IsLP64 ? X86::SUB64rr : X86::SUB32rr), + physSPReg) + .addReg(physSPReg) + .addReg(tmpSizeVReg); + BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) + .addReg(physSPReg); + + tailMBB->splice(tailMBB->end(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + tailMBB->transferSuccessorsAndUpdatePHIs(BB); + BB->addSuccessor(testMBB); + + // Delete the original pseudo instruction. + MI.eraseFromParent(); + + // And we're done. + return tailMBB; +} + +MachineBasicBlock * X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); @@ -31133,6 +31232,9 @@ case X86::SEG_ALLOCA_32: case X86::SEG_ALLOCA_64: return EmitLoweredSegAlloca(MI, BB); + case X86::PROBED_ALLOCA_32: + case X86::PROBED_ALLOCA_64: + return EmitLoweredProbedAlloca(MI, BB); case X86::TLSCall_32: case X86::TLSCall_64: return EmitLoweredTLSCall(MI, BB); diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -111,6 +111,23 @@ [(set GR64:$dst, (X86SegAlloca GR64:$size))]>, Requires<[In64BitMode]>; + +// To protect against stack clash, dynamic allocation should perform a memory +// probe at each page. + +let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in +def PROBED_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size), + "# variable sized alloca with probing", + [(set GR32:$dst, + (X86ProbedAlloca GR32:$size))]>, + Requires<[NotLP64]>; + +let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in +def PROBED_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), + "# variable sized alloca with probing", + [(set GR64:$dst, + (X86ProbedAlloca GR64:$size))]>, + Requires<[In64BitMode]>; } // Dynamic stack allocation yields a _chkstk or _alloca call for all Windows diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -121,6 +121,8 @@ def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; +def SDT_X86PROBED_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; + def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; @@ -292,6 +294,9 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA, [SDNPHasChain]>; +def X86ProbedAlloca : SDNode<"X86ISD::PROBED_ALLOCA", SDT_X86PROBED_ALLOCA, + [SDNPHasChain]>; + def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll @@ -0,0 +1,16 @@ +; RUN: llc --stack-clash-protector < %s | FileCheck %s + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: foo: +define i32 @foo(i32 %n) local_unnamed_addr { +; CHECK: subq $4096, %rsp +; CHECK: movq $0, (%rsp) + %a = alloca i32, i32 %n, align 16 + %b = getelementptr inbounds i32, i32* %a, i64 1198 + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} diff --git a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll @@ -0,0 +1,23 @@ +; RUN: llc --stack-clash-protector < %s | FileCheck %s + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: foo: +define i32 @foo() local_unnamed_addr { +; two stack growth, with an extra probe inbetween and a natural probe afterward +; CHECK: subq +; CHECK-NOT: movq $0 +; CHECK: mov +; CHECK: subq +; CHECK-NOT: movq $0 +; CHECK: mov + %a = alloca i32, i64 2000, align 16 + %b0 = getelementptr inbounds i32, i32* %a, i64 98 + %b1 = getelementptr inbounds i32, i32* %a, i64 1198 + store volatile i32 1, i32* %b0 + store volatile i32 1, i32* %b1 + %c = load volatile i32, i32* %a + ret i32 %c +} diff --git a/llvm/test/CodeGen/X86/stack-clash-medium.ll b/llvm/test/CodeGen/X86/stack-clash-medium.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/stack-clash-medium.ll @@ -0,0 +1,19 @@ +; RUN: llc --stack-clash-protector < %s | FileCheck %s + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: foo: +define i32 @foo() local_unnamed_addr { +; two stack growth, with a natural probe inbetween and an extra probe afterward +; CHECK: subq +; CHECK: movl +; CHECK: subq +; CHECK: movq $0 + %a = alloca i32, i64 2000, align 16 + %b = getelementptr inbounds i32, i32* %a, i64 1198 + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} diff --git a/llvm/test/CodeGen/X86/stack-clash-small.ll b/llvm/test/CodeGen/X86/stack-clash-small.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/stack-clash-small.ll @@ -0,0 +1,17 @@ +; RUN: llc --stack-clash-protector < %s | FileCheck %s + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: foo: +define i32 @foo() local_unnamed_addr { +; only one stack growth +; CHECK: subq +; CHECK-NOT: subq + %a = alloca i32, i64 100, align 16 + %b = getelementptr inbounds i32, i32* %a, i64 98 + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +}