Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -1360,6 +1360,12 @@ /// getIRStackGuard returns nullptr. virtual Value *getSDagStackGuard(const Module &M) const; + /// If this function returns true, stack protection checks should XOR the + /// frame pointer (or whichever pointer is used to address locals) into the + /// stack guard value before checking it. getIRStackGuard must return nullptr + /// if this returns true. + virtual bool useStackGuardXorFP() const { return false; } + /// If the target has a standard stack protection check function that /// performs validation and error handling, returns the function. Otherwise, /// returns nullptr. Must be previously inserted by insertSSPDeclarations. @@ -3487,6 +3493,11 @@ return false; } + virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, + const SDLoc &DL) const { + llvm_unreachable("not implemented for this target"); + } + /// Lower TLS global address SDNode for target independent emulated TLS model. virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const; Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2146,11 +2146,14 @@ unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); // Generate code to load the content of the guard slot. - SDValue StackSlot = DAG.getLoad( + SDValue GuardVal = DAG.getLoad( PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, MachineMemOperand::MOVolatile); + if (TLI.useStackGuardXorFP()) + GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl); + // Retrieve guard check function, nullptr if instrumentation is inlined. if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) { // The target provides a guard check function to validate the guard value. @@ -2162,7 +2165,7 @@ TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Node = StackSlot; + Entry.Node = GuardVal; Entry.Ty = FnTy->getParamType(0); if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) Entry.IsInReg = true; @@ -2195,7 +2198,7 @@ // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal); SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), @@ -2205,7 +2208,7 @@ // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, StackSlot.getOperand(0), + MVT::Other, GuardVal.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. SDValue Br = DAG.getNode(ISD::BR, dl, @@ -5644,6 +5647,8 @@ MachinePointerInfo(Global, 0), Align, MachineMemOperand::MOVolatile); } + if (TLI.useStackGuardXorFP()) + Res = TLI.emitStackGuardXorFP(DAG, Res, sdl); DAG.setRoot(Chain); setValue(&I, Res); return nullptr; Index: llvm/lib/CodeGen/StackProtector.cpp =================================================================== --- llvm/lib/CodeGen/StackProtector.cpp +++ llvm/lib/CodeGen/StackProtector.cpp @@ -385,8 +385,13 @@ /// - The epilogue checks the value stored in the prologue against the original /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { + // If the target wants to XOR the frame pointer into the guard value, it's + // impossible to emit the check in IR, so the target *must* support stack + // protection in SDAG, and it's fast isel 'ret' selector must implement or + // fall back in the presence of stack protection. bool SupportsSelectionDAGSP = - EnableSelectionDAGSP && !TM->Options.EnableFastISel; + TLI->useStackGuardXorFP() || + (EnableSelectionDAGSP && !TM->Options.EnableFastISel); AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. for (Function::iterator I = F->begin(), E = F->end(); I != E;) { Index: llvm/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.h +++ llvm/lib/Target/X86/X86ISelLowering.h @@ -1055,9 +1055,13 @@ Value *getIRStackGuard(IRBuilder<> &IRB) const override; bool useLoadStackGuardNode() const override; + bool useStackGuardXorFP() const override; void insertSSPDeclarations(Module &M) const override; Value *getSDagStackGuard(const Module &M) const override; Value *getSSPStackGuardCheck(const Module &M) const override; + SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, + const SDLoc &DL) const override; + /// Return true if the target stores SafeStack pointer at a fixed offset in /// some non-standard address space, and populates the address space and Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1685,6 +1685,19 @@ return Subtarget.isTargetMachO() && Subtarget.is64Bit(); } +bool X86TargetLowering::useStackGuardXorFP() const { + // Currently only MSVC CRTs XOR the frame pointer into the stack guard value. + return Subtarget.getTargetTriple().isOSMSVCRT(); +} + +SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, + const SDLoc &DL) const { + EVT PtrTy = getPointerTy(DAG.getDataLayout()); + unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP; + MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val); + return SDValue(Node, 0); +} + TargetLoweringBase::LegalizeTypeAction X86TargetLowering::getPreferredVectorAction(EVT VT) const { if (ExperimentalVectorWideningLegalization && Index: llvm/lib/Target/X86/X86InstrCompiler.td =================================================================== --- llvm/lib/Target/X86/X86InstrCompiler.td +++ llvm/lib/Target/X86/X86InstrCompiler.td @@ -142,6 +142,15 @@ [(X86WinAlloca GR64:$size)]>, Requires<[In64BitMode]>; +// These instructions XOR the frame pointer into a GPR. They are use in some +// stack protection schemes. These are post-RA pseudos because we only know the +// frame register after register allocation. +let Constraints = "$src = $dst", isPseudo = 1 in { + def XOR32_FP : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src), + "xorl\t$$FP, $src", []>, Requires<[NotLP64]>; + def XOR64_FP : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src), + "xorq\t$$FP $src", []>, Requires<[In64BitMode]>; +} //===----------------------------------------------------------------------===// // EH Pseudo Instructions Index: llvm/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.cpp +++ llvm/lib/Target/X86/X86InstrInfo.cpp @@ -7761,6 +7761,18 @@ MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0); } +static bool expandXorFP(MachineInstrBuilder &MIB, const TargetInstrInfo &TII) { + MachineBasicBlock &MBB = *MIB->getParent(); + MachineFunction &MF = *MBB.getParent(); + const X86Subtarget &Subtarget = MF.getSubtarget(); + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + unsigned XorOp = + MIB->getOpcode() == X86::XOR64_FP ? X86::XOR64rr : X86::XOR32rr; + MIB->setDesc(TII.get(XorOp)); + MIB.addReg(TRI->getFrameRegister(MF), RegState::Undef); + return true; +} + // This is used to handle spills for 128/256-bit registers when we have AVX512, // but not VLX. If it uses an extended register we need to use an instruction // that loads the lower 128/256-bit, but is available with only AVX512F. @@ -7955,6 +7967,9 @@ case TargetOpcode::LOAD_STACK_GUARD: expandLoadStackGuard(MIB, *this); return true; + case X86::XOR64_FP: + case X86::XOR32_FP: + return expandXorFP(MIB, *this); } return false; } Index: llvm/test/CodeGen/X86/stack-protector-msvc.ll =================================================================== --- llvm/test/CodeGen/X86/stack-protector-msvc.ll +++ llvm/test/CodeGen/X86/stack-protector-msvc.ll @@ -1,19 +1,9 @@ +; RUN: llc -mtriple=i386-pc-windows-msvc < %s -o - | FileCheck -check-prefix=MSVC-X86 %s +; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s -o - | FileCheck -check-prefix=MSVC-X64 %s -; RUN: llc -mtriple=i386-pc-windows-msvc < %s -o - | FileCheck -check-prefix=MSVC-I386 %s -; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s -o - | FileCheck -check-prefix=MSVC-64 %s - -; MSVC-I386: movl ___security_cookie, %[[REG1:[a-z]*]] -; MSVC-I386: movl %[[REG1]], [[SLOT:[0-9]*]](%esp) -; MSVC-I386: calll _strcpy -; MSVC-I386: movl [[SLOT]](%esp), %ecx -; MSVC-I386: calll @__security_check_cookie@4 -; MSVC-I386: retl - -; MSVC-64: movq __security_cookie(%rip), %[[REG1:[a-z]*]] -; MSVC-64: movq %[[REG1]], [[SLOT:[0-9]*]](%rsp) -; MSVC-64: callq strcpy -; MSVC-64: movq [[SLOT]](%rsp), %rcx -; MSVC-64: callq __security_check_cookie +; Make sure fastisel falls back and does something secure. +; RUN: llc -mtriple=i686-pc-windows-msvc -O0 < %s -o - | FileCheck -check-prefix=MSVC-X86-O0 %s +; RUN: llc -mtriple=x86_64-pc-windows-msvc -O0 < %s -o - | FileCheck -check-prefix=MSVC-X64-O0 %s @"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00" ; <[11 x i8]*> [#uses=1] @@ -21,7 +11,6 @@ entry: %a_addr = alloca i8* ; [#uses=2] %buf = alloca [8 x i8] ; <[8 x i8]*> [#uses=2] - %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] store i8* %a, i8** %a_addr %buf1 = bitcast [8 x i8]* %buf to i8* ; [#uses=1] %0 = load i8*, i8** %a_addr, align 4 ; [#uses=1] @@ -34,6 +23,139 @@ ret void } +; MSVC-X86-LABEL: _test: +; MSVC-X86: movl ___security_cookie, %[[REG1:[^ ]*]] +; MSVC-X86: xorl %esp, %[[REG1]] +; MSVC-X86: movl %[[REG1]], [[SLOT:[0-9]*]](%esp) +; MSVC-X86: calll _strcpy +; MSVC-X86: movl [[SLOT]](%esp), %ecx +; MSVC-X86: xorl %esp, %ecx +; MSVC-X86: calll @__security_check_cookie@4 +; MSVC-X86: retl + +; MSVC-X64-LABEL: test: +; MSVC-X64: movq __security_cookie(%rip), %[[REG1:[^ ]*]] +; MSVC-X64: xorq %rsp, %[[REG1]] +; MSVC-X64: movq %[[REG1]], [[SLOT:[0-9]*]](%rsp) +; MSVC-X64: callq strcpy +; MSVC-X64: movq [[SLOT]](%rsp), %rcx +; MSVC-X64: xorq %rsp, %rcx +; MSVC-X64: callq __security_check_cookie +; MSVC-X64: retq + +; MSVC-X86-O0-LABEL: _test: +; MSVC-X86-O0: movl ___security_cookie, %[[REG1:[^ ]*]] +; MSVC-X86-O0: xorl %esp, %[[REG1]] +; MSVC-X86-O0: movl %[[REG1]], [[SLOT:[0-9]*]](%esp) +; MSVC-X86-O0: calll _strcpy +; MSVC-X86-O0: movl [[SLOT]](%esp), %[[REG1:[^ ]*]] +; MSVC-X86-O0: xorl %esp, %[[REG1]] +; MSVC-X86-O0: movl %[[REG1]], %ecx +; MSVC-X86-O0: calll @__security_check_cookie@4 +; MSVC-X86-O0: retl + +; MSVC-X64-O0-LABEL: test: +; MSVC-X64-O0: movq __security_cookie(%rip), %[[REG1:[^ ]*]] +; MSVC-X64-O0: xorq %rsp, %[[REG1]] +; MSVC-X64-O0: movq %[[REG1]], [[SLOT:[0-9]*]](%rsp) +; MSVC-X64-O0: callq strcpy +; MSVC-X64-O0: movq [[SLOT]](%rsp), %[[REG1:[^ ]*]] +; MSVC-X64-O0: xorq %rsp, %[[REG1]] +; MSVC-X64-O0: movq %[[REG1]], %rcx +; MSVC-X64-O0: callq __security_check_cookie +; MSVC-X64-O0: retq + + +declare void @escape(i32*) + +define void @test_vla(i32 %n) nounwind ssp { + %vla = alloca i32, i32 %n + call void @escape(i32* %vla) + ret void +} + +; MSVC-X86-LABEL: _test_vla: +; MSVC-X86: pushl %ebp +; MSVC-X86: movl %esp, %ebp +; MSVC-X86: movl ___security_cookie, %[[REG1:[^ ]*]] +; MSVC-X86: xorl %ebp, %[[REG1]] +; MSVC-X86: movl %[[REG1]], [[SLOT:-[0-9]*]](%ebp) +; MSVC-X86: calll __chkstk +; MSVC-X86: pushl +; MSVC-X86: calll _escape +; MSVC-X86: movl [[SLOT]](%ebp), %ecx +; MSVC-X86: xorl %ebp, %ecx +; MSVC-X86: calll @__security_check_cookie@4 +; MSVC-X86: movl %ebp, %esp +; MSVC-X86: popl %ebp +; MSVC-X86: retl + +; MSVC-X64-LABEL: test_vla: +; MSVC-X64: pushq %rbp +; MSVC-X64: subq $16, %rsp +; MSVC-X64: leaq 16(%rsp), %rbp +; MSVC-X64: movq __security_cookie(%rip), %[[REG1:[^ ]*]] +; MSVC-X64: xorq %rbp, %[[REG1]] +; MSVC-X64: movq %[[REG1]], [[SLOT:-[0-9]*]](%rbp) +; MSVC-X64: callq __chkstk +; MSVC-X64: callq escape +; MSVC-X64: movq [[SLOT]](%rbp), %rcx +; MSVC-X64: xorq %rbp, %rcx +; MSVC-X64: callq __security_check_cookie +; MSVC-X64: retq + + +; This case is interesting because we address local variables with RBX but XOR +; the guard value with RBP. That's fine, either value will do, as long as they +; are the same across the life of the frame. + +define void @test_vla_realign(i32 %n) nounwind ssp { + %realign = alloca i32, align 32 + %vla = alloca i32, i32 %n + call void @escape(i32* %realign) + call void @escape(i32* %vla) + ret void +} + +; MSVC-X86-LABEL: _test_vla_realign: +; MSVC-X86: pushl %ebp +; MSVC-X86: movl %esp, %ebp +; MSVC-X86: pushl %esi +; MSVC-X86: andl $-32, %esp +; MSVC-X86: subl $32, %esp +; MSVC-X86: movl %esp, %esi +; MSVC-X86: movl ___security_cookie, %[[REG1:[^ ]*]] +; MSVC-X86: xorl %ebp, %[[REG1]] +; MSVC-X86: movl %[[REG1]], [[SLOT:[0-9]*]](%esi) +; MSVC-X86: calll __chkstk +; MSVC-X86: pushl +; MSVC-X86: calll _escape +; MSVC-X86: movl [[SLOT]](%esi), %ecx +; MSVC-X86: xorl %ebp, %ecx +; MSVC-X86: calll @__security_check_cookie@4 +; MSVC-X86: leal -8(%ebp), %esp +; MSVC-X86: popl %esi +; MSVC-X86: popl %ebp +; MSVC-X86: retl + +; MSVC-X64-LABEL: test_vla_realign: +; MSVC-X64: pushq %rbp +; MSVC-X64: pushq %rbx +; MSVC-X64: subq $32, %rsp +; MSVC-X64: leaq 32(%rsp), %rbp +; MSVC-X64: andq $-32, %rsp +; MSVC-X64: movq %rsp, %rbx +; MSVC-X64: movq __security_cookie(%rip), %[[REG1:[^ ]*]] +; MSVC-X64: xorq %rbp, %[[REG1]] +; MSVC-X64: movq %[[REG1]], [[SLOT:[0-9]*]](%rbx) +; MSVC-X64: callq __chkstk +; MSVC-X64: callq escape +; MSVC-X64: movq [[SLOT]](%rbx), %rcx +; MSVC-X64: xorq %rbp, %rcx +; MSVC-X64: callq __security_check_cookie +; MSVC-X64: retq + + declare i8* @strcpy(i8*, i8*) nounwind declare i32 @printf(i8*, ...) nounwind Index: llvm/test/CodeGen/X86/stack-protector-weight.ll =================================================================== --- llvm/test/CodeGen/X86/stack-protector-weight.ll +++ llvm/test/CodeGen/X86/stack-protector-weight.ll @@ -21,10 +21,11 @@ ; MSVC-SELDAG: LD4[FixedStack0] ; MSVC-SELDAG: CALLpcrel32 +; MSVC always uses selection DAG now. ; MSVC-IR: # Machine code for function test_branch_weights: ; MSVC-IR: mem:Volatile LD4[@__security_cookie] ; MSVC-IR: ST4[FixedStack0] -; MSVC-IR: LD4[%StackGuardSlot] +; MSVC-IR: LD4[FixedStack0] ; MSVC-IR: CALLpcrel32 define i32 @test_branch_weights(i32 %n) #0 {