Index: lib/Target/X86/CMakeLists.txt =================================================================== --- lib/Target/X86/CMakeLists.txt +++ lib/Target/X86/CMakeLists.txt @@ -21,6 +21,7 @@ add_public_tablegen_target(X86CommonTableGen) set(sources + ShadowCallStack.cpp X86AsmPrinter.cpp X86CallFrameOptimization.cpp X86CallingConv.cpp Index: lib/Target/X86/ShadowCallStack.cpp =================================================================== --- /dev/null +++ lib/Target/X86/ShadowCallStack.cpp @@ -0,0 +1,236 @@ +//===------- ShadowCallStack.cpp - Shadow Call Stack pass -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The ShadowCallStack pass instruments function prologs/epilogs to check that +// the return address has not been corrupted during the execution of the +// function. The return address is stored in a 'shadow call stack' addressed +// using the %gs segment register. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +namespace llvm { +void initializeShadowCallStackPass(PassRegistry &); +} + +namespace { + +class ShadowCallStack : public MachineFunctionPass { +public: + static char ID; + + ShadowCallStack() : MachineFunctionPass(ID) { + initializeShadowCallStackPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + +private: + const TargetInstrInfo *TII; + + void addProlog(MachineFunction &Fn); + void addEpilog(MachineBasicBlock &MBB, MachineInstr &MI, + MachineBasicBlock *ud2); + // Longer instruction sequence that only uses r10 for when a tailcall + // branches to r11. + void addEpilogOnlyR10(MachineBasicBlock &MBB, MachineInstr &MI, + MachineBasicBlock *ud2); +}; + +char ShadowCallStack::ID = 0; + +} // end anonymous namespace. + +// Helper function to add ModR/M references for [Seg: Reg + Offset] memory +// accesses +static inline const MachineInstrBuilder & +addSegmentedMem(const MachineInstrBuilder &MIB, unsigned Seg, unsigned Reg, + int Offset = 0) { + return MIB.addReg(Reg).addImm(1).addReg(0).addImm(Offset).addReg(Seg); +} + +void ShadowCallStack::addProlog(MachineFunction &Fn) { + MachineBasicBlock *MBB = Fn.getBlockNumbered(0); + const MachineBasicBlock *NonEmpty = MBB; + if (MBB->empty()) + NonEmpty = MBB->getFallThrough(); + const DebugLoc &DL = NonEmpty->front().getDebugLoc(); + + const unsigned PCReg = X86::R10; + const unsigned OffsetReg = X86::R11; + + // Insert a dummy first instruction that we later remove if the MBB is empty + // so we can use the same form of BuildMI for empty and non-empty MBBs. + MachineInstr *Noop = nullptr; + if (MBB->empty()) + Noop = BuildMI(MBB, DL, TII->get(X86::NOOP)); + + MachineInstr *MI = &MBB->front(); + // mov r10, [rsp] + addDirectMem(BuildMI(*MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(PCReg), + X86::RSP); + // xor r11, r11 + BuildMI(*MBB, MI, DL, TII->get(X86::XOR64rr)) + .addDef(OffsetReg) + .addReg(OffsetReg, RegState::Undef) + .addReg(OffsetReg, RegState::Undef); + // add QWORD [gs:r11], 8 + addSegmentedMem(BuildMI(*MBB, MI, DL, TII->get(X86::ADD64mi8)), X86::GS, + OffsetReg) + .addImm(8); + // mov r11, [gs:r11] + addSegmentedMem( + BuildMI(*MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(OffsetReg), X86::GS, + OffsetReg); + // mov [gs:r11], r10 + addSegmentedMem(BuildMI(*MBB, MI, DL, TII->get(X86::MOV64mr)), X86::GS, + OffsetReg) + .addReg(PCReg); + + if (Noop) + Noop->eraseFromParent(); +} + +void ShadowCallStack::addEpilog(MachineBasicBlock &MBB, MachineInstr &MI, + MachineBasicBlock *ud2) { + const DebugLoc &DL = MI.getDebugLoc(); + + // xor r11, r11 + BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr)) + .addDef(X86::R11) + .addReg(X86::R11, RegState::Undef) + .addReg(X86::R11, RegState::Undef); + // mov r10, [gs:r11] + addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10), + X86::GS, X86::R11); + // mov r10, [gs:r10] + addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10), + X86::GS, X86::R10); + // sub QWORD [gs:r11], 8 + // This instruction should not be moved up to avoid a signal race. + addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)), + X86::GS, X86::R11) + .addImm(8); + // cmp [rsp], r10 + addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP) + .addReg(X86::R10); + // jne trap + BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(ud2); + MBB.addSuccessor(ud2); +} + +void ShadowCallStack::addEpilogOnlyR10(MachineBasicBlock &MBB, MachineInstr &MI, + MachineBasicBlock *ud2) { + const DebugLoc &DL = MI.getDebugLoc(); + + // xor r10, r10 + BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr)) + .addDef(X86::R10) + .addReg(X86::R10, RegState::Undef) + .addReg(X86::R10, RegState::Undef); + // mov r10, [gs:r10] + addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10), + X86::GS, X86::R10); + // mov r10, [gs:r10] + addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10), + X86::GS, X86::R10); + // sub QWORD [gs:0], 8 + // This instruction should not be moved up to avoid a signal race. + addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)), X86::GS, 0) + .addImm(8); + // cmp [rsp], r10 + addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP) + .addReg(X86::R10); + // jne trap + BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(ud2); + MBB.addSuccessor(ud2); +} + +bool ShadowCallStack::runOnMachineFunction(MachineFunction &Fn) { + if (!Fn.getFunction().hasFnAttribute(Attribute::ShadowCallStack) || + Fn.getFunction().hasFnAttribute(Attribute::Naked)) + return false; + + // FIXME: Skip functions that have r10 or r11 live on entry (r10 can be live + // on entry for parameters with the nest attribute.) + if (Fn.empty() || Fn.front().isLiveIn(X86::R10) || + Fn.front().isLiveIn(X86::R11)) + return false; + + // FIXME: Skip functions with conditional and r10 tail calls for now. + for (auto &MBB : Fn) { + if (MBB.empty()) + continue; + + const MachineInstr &MI = MBB.instr_back(); + if (MI.isReturn() && MI.isCall()) { + if (MI.findRegisterUseOperand(X86::EFLAGS)) + return false; + // This should only be possible on Windows 64 (see GR64_TC versus + // GR64_TCW64.) + if (MI.findRegisterUseOperand(X86::R10) || + MI.hasRegisterImplicitUseOperand(X86::R10)) + return false; + } + } + + TII = Fn.getSubtarget().getInstrInfo(); + + MachineBasicBlock *Trap = nullptr; + for (auto &MBB : Fn) { + if (MBB.empty()) + continue; + + MachineInstr &MI = MBB.instr_back(); + if (MI.isReturn()) { + if (!Trap) { + Trap = Fn.CreateMachineBasicBlock(); + BuildMI(Trap, MI.getDebugLoc(), TII->get(X86::TRAP)); + Fn.push_back(Trap); + } + + if (MI.findRegisterUseOperand(X86::R11)) + addEpilogOnlyR10(MBB, MI, Trap); + else + addEpilog(MBB, MI, Trap); + } + } + + if (Trap) + addProlog(Fn); + + return Trap != nullptr; +} + +INITIALIZE_PASS(ShadowCallStack, "shadow-call-stack", "Shadow Call Stack", + false, false) + +FunctionPass *llvm::createShadowCallStackPass() { + return new ShadowCallStack(); +} Index: lib/Target/X86/X86.h =================================================================== --- lib/Target/X86/X86.h +++ lib/Target/X86/X86.h @@ -50,6 +50,11 @@ /// transition penalty between functions encoded with AVX and SSE. FunctionPass *createX86IssueVZeroUpperPass(); +/// This pass instruments the function prolog to save the return address to a +/// 'shadow call stack' and the function epilog to check that the return address +/// did not changed during function execution. +FunctionPass *createShadowCallStackPass(); + /// This pass inserts ENDBR instructions before indirect jump/call /// destinations as part of CET IBT mechanism. FunctionPass *createX86IndirectBranchTrackingPass(); Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -58,6 +58,7 @@ void initializeWinEHStatePassPass(PassRegistry &); void initializeFixupLEAPassPass(PassRegistry &); +void initializeShadowCallStackPass(PassRegistry &); void initializeX86CallFrameOptimizationPass(PassRegistry &); void initializeX86CmovConverterPassPass(PassRegistry &); void initializeX86ExecutionDomainFixPass(PassRegistry &); @@ -76,6 +77,7 @@ initializeFixupBWInstPassPass(PR); initializeEvexToVexInstPassPass(PR); initializeFixupLEAPassPass(PR); + initializeShadowCallStackPass(PR); initializeX86CallFrameOptimizationPass(PR); initializeX86CmovConverterPassPass(PR); initializeX86ExecutionDomainFixPass(PR); @@ -470,6 +472,7 @@ addPass(createBreakFalseDeps()); } + addPass(createShadowCallStackPass()); addPass(createX86IndirectBranchTrackingPass()); if (UseVZeroUpper) Index: test/CodeGen/X86/O0-pipeline.ll =================================================================== --- test/CodeGen/X86/O0-pipeline.ll +++ test/CodeGen/X86/O0-pipeline.ll @@ -49,6 +49,7 @@ ; CHECK-NEXT: Post-RA pseudo instruction expansion pass ; CHECK-NEXT: X86 pseudo instruction expansion pass ; CHECK-NEXT: Analyze Machine Code For Garbage Collection +; CHECK-NEXT: Shadow Call Stack ; CHECK-NEXT: X86 Indirect Branch Tracking ; CHECK-NEXT: X86 vzeroupper inserter ; CHECK-NEXT: Contiguously Lay Out Funclets Index: test/CodeGen/X86/shadow-call-stack.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/shadow-call-stack.mir @@ -0,0 +1,125 @@ +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass shadow-call-stack -verify-machineinstrs -o - %s | FileCheck %s +--- | + + define void @no_return() #0 { ret void } + define void @normal_return() #0 { ret void } + define void @r10_live_in() #0 { ret void } + define void @normal_tail_call() #0 { ret void } + define void @r11_tail_call() #0 { ret void } + define void @conditional_tail_call() #0 { ret void } + + attributes #0 = { shadowcallstack } + +... +--- +# CHECK-LABEL: name: no_return +name: no_return +body: | + bb.0: + ; CHECK: bb.0: + ; CHECK-NEXT: $eax = MOV32ri 13 + $eax = MOV32ri 13 +... +--- +# CHECK-LABEL: name: normal_return +name: normal_return +body: | + bb.0: + ; CHECK: bb.0: + ; CHECK: $r10 = MOV64rm $rsp, 1, $noreg, 0, $noreg + ; CHECK-NEXT: $r11 = XOR64rr undef $r11, undef $r11, implicit-def $eflags + ; CHECK-NEXT: ADD64mi8 $r11, 1, $noreg, 0, $gs, 8, implicit-def $eflags + ; CHECK-NEXT: $r11 = MOV64rm $r11, 1, $noreg, 0, $gs + ; CHECK-NEXT: MOV64mr $r11, 1, $noreg, 0, $gs, $r10 + ; CHECK-NEXT: $eax = MOV32ri 13 + $eax = MOV32ri 13 + + ; CHECK-NEXT: $r11 = XOR64rr undef $r11, undef $r11, implicit-def $eflags + ; CHECK-NEXT: $r10 = MOV64rm $r11, 1, $noreg, 0, $gs + ; CHECK-NEXT: $r10 = MOV64rm $r10, 1, $noreg, 0, $gs + ; CHECK-NEXT: SUB64mi8 $r11, 1, $noreg, 0, $gs, 8, implicit-def $eflags + ; CHECK-NEXT: CMP64mr $rsp, 1, $noreg, 0, $noreg, $r10, implicit-def $eflags + ; CHECK-NEXT: JNE_1 %bb.1, implicit $eflags + ; CHECK-NEXT: RETQ $eax + RETQ $eax + + ; CHECK: bb.1: + ; CHECK-NEXT; TRAP +... +--- +# CHECK-LABEL: name: r10_live_in +name: r10_live_in +body: | + bb.0: + liveins: $r10 + + ; CHECK: bb.0: + ; CHECK-NEXT: $eax = MOV32ri 13 + $eax = MOV32ri 13 + ; CHECK-NEXT: RETQ $eax + RETQ $eax +... +--- +# CHECK-LABEL: name: normal_tail_call +name: normal_tail_call +body: | + bb.0: + ; CHECK: bb.0: + ; CHECK: $r10 = MOV64rm $rsp, 1, $noreg, 0, $noreg + ; CHECK-NEXT: $r11 = XOR64rr undef $r11, undef $r11, implicit-def $eflags + ; CHECK-NEXT: ADD64mi8 $r11, 1, $noreg, 0, $gs, 8, implicit-def $eflags + ; CHECK-NEXT: $r11 = MOV64rm $r11, 1, $noreg, 0, $gs + ; CHECK-NEXT: MOV64mr $r11, 1, $noreg, 0, $gs, $r10 + ; CHECK-NEXT: $eax = MOV32ri 13 + $eax = MOV32ri 13 + + ; CHECK-NEXT: $r11 = XOR64rr undef $r11, undef $r11, implicit-def $eflags + ; CHECK-NEXT: $r10 = MOV64rm $r11, 1, $noreg, 0, $gs + ; CHECK-NEXT: $r10 = MOV64rm $r10, 1, $noreg, 0, $gs + ; CHECK-NEXT: SUB64mi8 $r11, 1, $noreg, 0, $gs, 8, implicit-def $eflags + ; CHECK-NEXT: CMP64mr $rsp, 1, $noreg, 0, $noreg, $r10, implicit-def $eflags + ; CHECK-NEXT: JNE_1 %bb.1, implicit $eflags + ; CHECK-NEXT: TAILJMPr64 $rax + TAILJMPr64 $rax + + ; CHECK: bb.1: + ; CHECK-NEXT; TRAP +... +--- +# CHECK-LABEL: name: r11_tail_call +name: r11_tail_call +body: | + bb.0: + ; CHECK: bb.0: + ; CHECK: $r10 = MOV64rm $rsp, 1, $noreg, 0, $noreg + ; CHECK-NEXT: $r11 = XOR64rr undef $r11, undef $r11, implicit-def $eflags + ; CHECK-NEXT: ADD64mi8 $r11, 1, $noreg, 0, $gs, 8, implicit-def $eflags + ; CHECK-NEXT: $r11 = MOV64rm $r11, 1, $noreg, 0, $gs + ; CHECK-NEXT: MOV64mr $r11, 1, $noreg, 0, $gs, $r10 + ; CHECK-NEXT: $eax = MOV32ri 13 + $eax = MOV32ri 13 + + ; CHECK-NEXT: $r10 = XOR64rr undef $r10, undef $r10, implicit-def $eflags + ; CHECK-NEXT: $r10 = MOV64rm $r10, 1, $noreg, 0, $gs + ; CHECK-NEXT: $r10 = MOV64rm $r10, 1, $noreg, 0, $gs + ; CHECK-NEXT: SUB64mi8 $noreg, 1, $noreg, 0, $gs, 8, implicit-def $eflags + ; CHECK-NEXT: CMP64mr $rsp, 1, $noreg, 0, $noreg, $r10, implicit-def $eflags + ; CHECK-NEXT: JNE_1 %bb.1, implicit $eflags + ; CHECK-NEXT: TAILJMPr64 $r11 + TAILJMPr64 $r11 + + ; CHECK: bb.1: + ; CHECK-NEXT; TRAP +... +--- +# CHECK-LABEL: name: conditional_tail_call +name: conditional_tail_call +body: | + bb.0: + ; CHECK: bb.0: + ; CHECK-NEXT: $eax = MOV32ri 13 + $eax = MOV32ri 13 + + ; CHECK-NEXT: TAILJMPd64_CC @conditional_tail_call, $eflags + TAILJMPd64_CC @conditional_tail_call, $eflags +...