Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2295,6 +2295,10 @@ Group, Flags<[CoreOption,CC1Option]>; def mno_speculative_load_hardening : Flag<["-"], "mno-speculative-load-hardening">, Group, Flags<[CoreOption]>; +def mlvi_cfi : Flag<["-"], "mlvi-cfi">, Group, Flags<[CoreOption,DriverOption]>, + HelpText<"Enable only control-flow mitigations for Load Value Injection (LVI)">; +def mno_lvi_cfi : Flag<["-"], "mno-lvi-cfi">, Group, Flags<[CoreOption,DriverOption]>, + HelpText<"Disable control-flow mitigations for Load Value Injection (LVI)">; def mrelax : Flag<["-"], "mrelax">, Group, HelpText<"Enable linker relaxation">; Index: clang/lib/Driver/ToolChains/Arch/X86.cpp =================================================================== --- clang/lib/Driver/ToolChains/Arch/X86.cpp +++ clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -146,6 +146,7 @@ // flags). This is a bit hacky but keeps existing usages working. We should // consider deprecating this and instead warn if the user requests external // retpoline thunks and *doesn't* request some form of retpolines. + auto SpectreOpt = clang::driver::options::ID::OPT_INVALID; if (Args.hasArgNoClaim(options::OPT_mretpoline, options::OPT_mno_retpoline, options::OPT_mspeculative_load_hardening, options::OPT_mno_speculative_load_hardening)) { @@ -153,12 +154,14 @@ false)) { Features.push_back("+retpoline-indirect-calls"); Features.push_back("+retpoline-indirect-branches"); + SpectreOpt = options::OPT_mretpoline; } else if (Args.hasFlag(options::OPT_mspeculative_load_hardening, options::OPT_mno_speculative_load_hardening, false)) { // On x86, speculative load hardening relies on at least using retpolines // for indirect calls. Features.push_back("+retpoline-indirect-calls"); + SpectreOpt = options::OPT_mspeculative_load_hardening; } } else if (Args.hasFlag(options::OPT_mretpoline_external_thunk, options::OPT_mno_retpoline_external_thunk, false)) { @@ -166,6 +169,20 @@ // eventually switch to an error here. Features.push_back("+retpoline-indirect-calls"); Features.push_back("+retpoline-indirect-branches"); + SpectreOpt = options::OPT_mretpoline_external_thunk; + } + + auto LVIOpt = clang::driver::options::ID::OPT_INVALID; + if (Args.hasFlag(options::OPT_mlvi_cfi, options::OPT_mno_lvi_cfi, false)) { + Features.push_back("+lvi-cfi"); + LVIOpt = options::OPT_mlvi_cfi; + } + + if (SpectreOpt != clang::driver::options::ID::OPT_INVALID && + LVIOpt != clang::driver::options::ID::OPT_INVALID) { + D.Diag(diag::err_drv_argument_not_allowed_with) + << D.getOpts().getOptionName(SpectreOpt) + << D.getOpts().getOptionName(LVIOpt); } // Now add any that the user explicitly requested on the command line, Index: llvm/lib/Target/X86/CMakeLists.txt =================================================================== --- llvm/lib/Target/X86/CMakeLists.txt +++ llvm/lib/Target/X86/CMakeLists.txt @@ -51,6 +51,7 @@ X86InstrInfo.cpp X86EvexToVex.cpp X86LegalizerInfo.cpp + X86LoadValueInjectionIndirectThunks.cpp X86MCInstLower.cpp X86MachineFunctionInfo.cpp X86MacroFusion.cpp Index: llvm/lib/Target/X86/X86.h =================================================================== --- llvm/lib/Target/X86/X86.h +++ llvm/lib/Target/X86/X86.h @@ -137,6 +137,7 @@ X86Subtarget &, X86RegisterBankInfo &); +FunctionPass *createX86LoadValueInjectionIndirectThunksPass(); FunctionPass *createX86SpeculativeLoadHardeningPass(); void initializeEvexToVexInstPassPass(PassRegistry &); @@ -152,6 +153,7 @@ void initializeX86ExecutionDomainFixPass(PassRegistry &); void initializeX86ExpandPseudoPass(PassRegistry &); void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); +void initializeX86LoadValueInjectionIndirectThunksPassPass(PassRegistry &); void initializeX86OptimizeLEAPassPass(PassRegistry &); void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); Index: llvm/lib/Target/X86/X86.td =================================================================== --- llvm/lib/Target/X86/X86.td +++ llvm/lib/Target/X86/X86.td @@ -431,6 +431,15 @@ "ourselves. Only has effect when combined with some other retpoline " "feature", [FeatureRetpolineIndirectCalls]>; +// Mitigate LVI attacks against indirect calls/branches and call returns +def FeatureLVIControlFlowIntegrity + : SubtargetFeature< + "lvi-cfi", "UseLVIControlFlowIntegrity", "true", + "Prevent indirect calls/branches from using a memory operand, and " + "precede all indirect calls/branches from a register with an " + "LFENCE instruction to serialize control flow. Also decompose RET " + "instructions into a POP+LFENCE+JMP sequence.">; + // Direct Move instructions. def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", "Support movdiri instruction">; Index: llvm/lib/Target/X86/X86FastISel.cpp =================================================================== --- llvm/lib/Target/X86/X86FastISel.cpp +++ llvm/lib/Target/X86/X86FastISel.cpp @@ -3207,8 +3207,9 @@ (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers"))) return false; - // Functions using retpoline for indirect calls need to use SDISel. - if (Subtarget->useRetpolineIndirectCalls()) + // Functions using retpoline/LVI thunks for indirect calls need to use SDISel. + if (Subtarget->useRetpolineIndirectCalls() || + Subtarget->useLVIControlFlowIntegrity()) return false; // Handle only C, fastcc, and webkit_js calling conventions for now. Index: llvm/lib/Target/X86/X86FrameLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86FrameLowering.cpp +++ llvm/lib/Target/X86/X86FrameLowering.cpp @@ -961,10 +961,13 @@ bool InProlog) const { bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; - // FIXME: Add retpoline support and remove this. + // FIXME: Add retpoline and LVI thunk support and remove this. if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls()) report_fatal_error("Emitting stack probe calls on 64-bit with the large " "code model and retpoline not yet implemented."); + if (Is64Bit && IsLargeCodeModel && STI.useLVIControlFlowIntegrity()) + report_fatal_error("Emitting stack probe calls on 64-bit with the large " + "code model and LVI thunks not yet implemented."); unsigned CallOp; if (Is64Bit) @@ -2702,10 +2705,13 @@ // This solution is not perfect, as it assumes that the .rodata section // is laid out within 2^31 bytes of each function body, but this seems // to be sufficient for JIT. - // FIXME: Add retpoline support and remove the error here.. + // FIXME: Add retpoline and LVI thunk support and remove the error here.. if (STI.useRetpolineIndirectCalls()) report_fatal_error("Emitting morestack calls on 64-bit with the large " "code model and retpoline not yet implemented."); + if (STI.useLVIControlFlowIntegrity()) + report_fatal_error("Emitting morestack calls on 64-bit with the large " + "code model and LVI thunks not yet implemented."); BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) .addReg(X86::RIP) .addImm(0) Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1023,7 +1023,8 @@ if (OptLevel != CodeGenOpt::None && // Only do this when the target can fold the load into the call or // jmp. - !Subtarget->useRetpolineIndirectCalls() && + !(Subtarget->useRetpolineIndirectCalls() || + Subtarget->useLVIControlFlowIntegrity()) && ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || (N->getOpcode() == X86ISD::TC_RETURN && (Subtarget->is64Bit() || Index: llvm/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.h +++ llvm/lib/Target/X86/X86ISelLowering.h @@ -1475,8 +1475,8 @@ MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI, - MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredThunk(MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30563,8 +30563,10 @@ } bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { - // If the subtarget is using retpolines, we need to not generate jump tables. - if (Subtarget.useRetpolineIndirectBranches()) + // If the subtarget is using retpolines or LVI thunks, we need to not generate + // jump tables. + if (Subtarget.useRetpolineIndirectBranches() || + Subtarget.useLVIControlFlowIntegrity()) return false; // Otherwise, fallback on the generic logic. @@ -31767,22 +31769,26 @@ return BB; } -static unsigned getOpcodeForRetpoline(unsigned RPOpc) { +static unsigned getOpcodeForThunk(unsigned RPOpc) { switch (RPOpc) { case X86::RETPOLINE_CALL32: return X86::CALLpcrel32; case X86::RETPOLINE_CALL64: return X86::CALL64pcrel32; + case X86::LVI_THUNK_CALL64: + return X86::CALL64pcrel32; case X86::RETPOLINE_TCRETURN32: return X86::TCRETURNdi; case X86::RETPOLINE_TCRETURN64: return X86::TCRETURNdi64; + case X86::LVI_THUNK_TCRETURN64: + return X86::TCRETURNdi64; } llvm_unreachable("not retpoline opcode"); } -static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, - unsigned Reg) { +static const char *getThunkSymbol(const X86Subtarget &Subtarget, + unsigned Reg) { if (Subtarget.useRetpolineExternalThunk()) { // When using an external thunk for retpolines, we pick names that match the // names GCC happens to use as well. This helps simplify the implementation @@ -31817,6 +31823,12 @@ llvm_unreachable("unexpected reg for retpoline"); } + if (Subtarget.useLVIControlFlowIntegrity()) { + assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); + assert(Reg == X86::R11 && "Invalid register for LVI CFI"); + return "__x86_indirect_thunk_r11"; + } + // When targeting an internal COMDAT thunk use an LLVM-specific name. switch (Reg) { case X86::EAX: @@ -31839,14 +31851,14 @@ } MachineBasicBlock * -X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, +X86TargetLowering::EmitLoweredThunk(MachineInstr &MI, MachineBasicBlock *BB) const { // Copy the virtual register into the R11 physical register and // call the retpoline thunk. DebugLoc DL = MI.getDebugLoc(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); Register CalleeVReg = MI.getOperand(0).getReg(); - unsigned Opc = getOpcodeForRetpoline(MI.getOpcode()); + unsigned Opc = getOpcodeForThunk(MI.getOpcode()); // Find an available scratch register to hold the callee. On 64-bit, we can // just use R11, but we scan for uses anyway to ensure we don't generate @@ -31880,7 +31892,7 @@ report_fatal_error("calling convention incompatible with retpoline, no " "available registers"); - const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg); + const char *Symbol = getThunkSymbol(Subtarget, AvailableReg); BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg) .addReg(CalleeVReg); @@ -32658,9 +32670,11 @@ return EmitLoweredTLSAddr(MI, BB); case X86::RETPOLINE_CALL32: case X86::RETPOLINE_CALL64: + case X86::LVI_THUNK_CALL64: case X86::RETPOLINE_TCRETURN32: case X86::RETPOLINE_TCRETURN64: - return EmitLoweredRetpoline(MI, BB); + case X86::LVI_THUNK_TCRETURN64: + return EmitLoweredThunk(MI, BB); case X86::CATCHRET: return EmitLoweredCatchRet(MI, BB); case X86::SEG_ALLOCA_32: Index: llvm/lib/Target/X86/X86InstrCompiler.td =================================================================== --- llvm/lib/Target/X86/X86InstrCompiler.td +++ llvm/lib/Target/X86/X86InstrCompiler.td @@ -1199,13 +1199,19 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode, NotUseRetpolineIndirectCalls, + NotUseLVIIndirectThunks]>; // Don't fold loads into X86tcret requiring more than 6 regs. // There wouldn't be enough scratch registers for base+index. def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off), (TCRETURNmi64 addr:$dst, imm:$off)>, - Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode, NotUseRetpolineIndirectCalls, + NotUseLVIIndirectThunks]>; + +def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), + (LVI_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>, + Requires<[In64BitMode, UseLVIIndirectThunks]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>, Index: llvm/lib/Target/X86/X86InstrControl.td =================================================================== --- llvm/lib/Target/X86/X86InstrControl.td +++ llvm/lib/Target/X86/X86InstrControl.td @@ -334,11 +334,13 @@ Requires<[In64BitMode]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst), "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, - Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode,NotUseRetpolineIndirectCalls, + NotUseLVIIndirectThunks]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst), "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, Requires<[In64BitMode,FavorMemIndirectCall, - NotUseRetpolineIndirectCalls]>; + NotUseRetpolineIndirectCalls, + NotUseLVIIndirectThunks]>; // Non-tracking calls for IBT, use with caution. let isCodeGenOnly = 1 in { @@ -400,6 +402,9 @@ def RETPOLINE_CALL64 : PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>, Requires<[In64BitMode,UseRetpolineIndirectCalls]>; + def LVI_THUNK_CALL64 : + PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>, + Requires<[In64BitMode,UseLVIIndirectThunks]>; // Retpoline variant of indirect tail calls. let isTerminator = 1, isReturn = 1, isBarrier = 1 in { @@ -408,6 +413,12 @@ def RETPOLINE_TCRETURN32 : PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>; } + + // LVI thunk variant of indirect tail calls. + let isTerminator = 1, isReturn = 1, isBarrier = 1 in { + def LVI_THUNK_TCRETURN64 : + PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>; + } } // Conditional tail calls are similar to the above, but they are branches Index: llvm/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.td +++ llvm/lib/Target/X86/X86InstrInfo.td @@ -1005,6 +1005,8 @@ def HasMFence : Predicate<"Subtarget->hasMFence()">; def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">; def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">; +def UseLVIIndirectThunks : Predicate<"Subtarget->useLVIControlFlowIntegrity()">; +def NotUseLVIIndirectThunks : Predicate<"!Subtarget->useLVIControlFlowIntegrity()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. Index: llvm/lib/Target/X86/X86LoadValueInjectionIndirectThunks.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/X86/X86LoadValueInjectionIndirectThunks.cpp @@ -0,0 +1,187 @@ +//=- X86LoadValueInjectionIndirectThunks.cpp - Construct LVI thunks for x86 -=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: This pass replaces each indirect call/jump with a direct call +/// to a thunk that looks like: +/// ``` +/// lfence +/// jmpq *%r11 +/// ``` +/// This ensures that if the value in register %r11 was loaded from memory, then +/// the value in %r11 is (architecturally) correct prior to the jump. +/// +/// Note: A lot of this code was lifted from X86RetpolineThunks.cpp. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define PASS_KEY "x86-lvi-thunks" +#define DEBUG_TYPE PASS_KEY + +static const char R11ThunkName[] = "__x86_indirect_thunk_r11"; + +namespace { +class X86LoadValueInjectionIndirectThunksPass : public MachineFunctionPass { +public: + X86LoadValueInjectionIndirectThunksPass() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "X86 Load Value Injection (LVI) Indirect Thunks"; + } + bool doInitialization(Module &M) override; + bool runOnMachineFunction(MachineFunction &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + } + + static char ID; + +private: + MachineModuleInfo *MMI; + const TargetMachine *TM; + const X86Subtarget *STI; + const X86InstrInfo *TII; + + bool InsertedThunks; + + void createThunkFunction(Module &M, StringRef Name); + void populateThunk(MachineFunction &MF, unsigned Reg); +}; + +} // end anonymous namespace + +char X86LoadValueInjectionIndirectThunksPass::ID = 0; + +bool X86LoadValueInjectionIndirectThunksPass::doInitialization(Module &M) { + InsertedThunks = false; + return false; +} + +bool X86LoadValueInjectionIndirectThunksPass::runOnMachineFunction( + MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() + << " *****\n"); + STI = &MF.getSubtarget(); + if (!STI->is64Bit()) + return false; // FIXME: support 32-bit + + // Don't skip functions with the "optnone" attr but participate in opt-bisect. + const Function &F = MF.getFunction(); + if (!F.hasOptNone() && skipFunction(F)) + return false; + + TM = &MF.getTarget(); + TII = STI->getInstrInfo(); + MMI = &getAnalysis().getMMI(); + Module &M = const_cast(*MMI->getModule()); + + // If this function is not a thunk, check to see if we need to insert + // a thunk. + if (MF.getName() != R11ThunkName) { + // If we've already inserted a thunk, nothing else to do. + if (InsertedThunks) + return false; + + // Only add a thunk if one of the functions has the LVI-CFI feature + // enabled in its subtarget + if (!STI->useLVIControlFlowIntegrity()) + return false; + + // Otherwise, we need to insert the thunk. + // WARNING: This is not really a well behaving thing to do in a function + // pass. We extract the module and insert a new function (and machine + // function) directly into the module. + LLVM_DEBUG(dbgs() << "Creating thunk procedure" << '\n'); + createThunkFunction(M, R11ThunkName); + InsertedThunks = true; + return true; + } + + LLVM_DEBUG(dbgs() << "Populating thunk" << '\n'); + populateThunk(MF, X86::R11); + return true; +} + +void X86LoadValueInjectionIndirectThunksPass::createThunkFunction( + Module &M, StringRef Name) { + LLVMContext &Ctx = M.getContext(); + auto Type = FunctionType::get(Type::getVoidTy(Ctx), false); + Function *F = + Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M); + F->setVisibility(GlobalValue::HiddenVisibility); + F->setComdat(M.getOrInsertComdat(Name)); + + // Add Attributes so that we don't create a frame, unwind information, or + // inline. + AttrBuilder B; + B.addAttribute(llvm::Attribute::NoUnwind); + B.addAttribute(llvm::Attribute::Naked); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); + + // Populate our function a bit so that we can verify. + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); + IRBuilder<> Builder(Entry); + + Builder.CreateRetVoid(); + + // MachineFunctions/MachineBasicBlocks aren't created automatically for the + // IR-level constructs we already made. Create them and insert them into the + // module. + MachineFunction &MF = MMI->getOrCreateMachineFunction(*F); + MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry); + + // Insert EntryMBB into MF. It's not in the module until we do this. + MF.insert(MF.end(), EntryMBB); +} + +void X86LoadValueInjectionIndirectThunksPass::populateThunk(MachineFunction &MF, + unsigned Reg) { + // Set MF properties. We never use vregs... + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); + + // Grab the entry MBB and erase any other blocks. O0 codegen appears to + // generate two bbs for the entry block. + MachineBasicBlock *Entry = &MF.front(); + Entry->clear(); + while (MF.size() > 1) + MF.erase(std::next(MF.begin())); + + BuildMI(Entry, DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(Entry, DebugLoc(), TII->get(X86::JMP64r)).addReg(Reg); + Entry->addLiveIn(Reg); + return; +} + +INITIALIZE_PASS_BEGIN(X86LoadValueInjectionIndirectThunksPass, PASS_KEY, + "X86 LVI indirect thunk inserter", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass) +INITIALIZE_PASS_END(X86LoadValueInjectionIndirectThunksPass, PASS_KEY, + "X86 LVI indirect thunk inserter", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionIndirectThunksPass() { + return new X86LoadValueInjectionIndirectThunksPass(); +} Index: llvm/lib/Target/X86/X86MCInstLower.cpp =================================================================== --- llvm/lib/Target/X86/X86MCInstLower.cpp +++ llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1226,6 +1226,9 @@ if (Subtarget->useRetpolineIndirectCalls()) report_fatal_error("Lowering register statepoints with retpoline not " "yet implemented."); + if (Subtarget->useLVIControlFlowIntegrity()) + report_fatal_error("Lowering register statepoints with LVI thunks not " + "yet implemented."); CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); CallOpcode = X86::CALL64r; break; @@ -1401,10 +1404,13 @@ EmitAndCountInstruction( MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); - // FIXME: Add retpoline support and remove this. + // FIXME: Add retpoline and LVI thunk support and remove this. if (Subtarget->useRetpolineIndirectCalls()) report_fatal_error( "Lowering patchpoint with retpoline not yet implemented."); + if (Subtarget->useLVIControlFlowIntegrity()) + report_fatal_error( + "Lowering patchpoint with LVI thunks not yet implemented."); EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); } Index: llvm/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/lib/Target/X86/X86Subtarget.h +++ llvm/lib/Target/X86/X86Subtarget.h @@ -425,6 +425,12 @@ /// than emitting one inside the compiler. bool UseRetpolineExternalThunk = false; + /// Prevent generation of indirect call/branch instructions from memory, + /// and force all indirect call/branch instructions from a register to be + /// preceded by an LFENCE. Also decompose RET instructions into a + /// POP+LFENCE+JMP sequence. + bool UseLVIControlFlowIntegrity = false; + /// Use software floating point for code generation. bool UseSoftFloat = false; @@ -713,6 +719,7 @@ bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } bool preferMaskRegisters() const { return PreferMaskRegisters; } bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; } + bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; } unsigned getPreferVectorWidth() const { return PreferVectorWidth; } unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; } @@ -860,7 +867,7 @@ /// If we are using retpolines, we need to expand indirectbr to avoid it /// lowering to an actual indirect jump. bool enableIndirectBrExpand() const override { - return useRetpolineIndirectBranches(); + return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity(); } /// Enable the MachineScheduler pass for all X86 subtargets. Index: llvm/lib/Target/X86/X86TargetMachine.cpp =================================================================== --- llvm/lib/Target/X86/X86TargetMachine.cpp +++ llvm/lib/Target/X86/X86TargetMachine.cpp @@ -82,6 +82,7 @@ initializeX86SpeculativeLoadHardeningPassPass(PR); initializeX86FlagsCopyLoweringPassPass(PR); initializeX86CondBrFoldingPassPass(PR); + initializeX86LoadValueInjectionIndirectThunksPassPass(PR); initializeX86OptimizeLEAPassPass(PR); } @@ -527,6 +528,7 @@ const MCAsmInfo *MAI = TM->getMCAsmInfo(); addPass(createX86RetpolineThunksPass()); + addPass(createX86LoadValueInjectionIndirectThunksPass()); // Insert extra int3 instructions after trailing call instructions to avoid // issues in the unwinder. Index: llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp =================================================================== --- llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -1253,7 +1253,8 @@ // Jump tables are only profitable if the retpoline mitigation is enabled. Attribute FSAttr = CS.getCaller()->getFnAttribute("target-features"); if (FSAttr.hasAttribute(Attribute::None) || - !FSAttr.getValueAsString().contains("+retpoline")) + !(FSAttr.getValueAsString().contains("+retpoline") || + FSAttr.getValueAsString().contains("+lvi-cfi"))) continue; if (RemarksEnabled) Index: llvm/test/CodeGen/X86/O0-pipeline.ll =================================================================== --- llvm/test/CodeGen/X86/O0-pipeline.ll +++ llvm/test/CodeGen/X86/O0-pipeline.ll @@ -73,6 +73,7 @@ ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: X86 Retpoline Thunks +; CHECK-NEXT: X86 Load Value Injection (LVI) Indirect Thunks ; CHECK-NEXT: Check CFA info and insert CFI instructions if needed ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter Index: llvm/test/CodeGen/X86/O3-pipeline.ll =================================================================== --- llvm/test/CodeGen/X86/O3-pipeline.ll +++ llvm/test/CodeGen/X86/O3-pipeline.ll @@ -182,6 +182,7 @@ ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: X86 Retpoline Thunks +; CHECK-NEXT: X86 Load Value Injection (LVI) Indirect Thunks ; CHECK-NEXT: Check CFA info and insert CFI instructions if needed ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter Index: llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll @@ -0,0 +1,282 @@ +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64 +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --check-prefix=X64FAST +; +; Note that a lot of this code was lifted from retpoline.ll. + +declare void @bar(i32) + +; Test a simple indirect call and tail call. +define void @icall_reg(void (i32)* %fp, i32 %x) #0 { +entry: + tail call void @bar(i32 %x) + tail call void %fp(i32 %x) + tail call void @bar(i32 %x) + tail call void %fp(i32 %x) + ret void +} + +; X64-LABEL: icall_reg: +; X64-DAG: movq %rdi, %[[fp:[^ ]*]] +; X64-DAG: movl %esi, %[[x:[^ ]*]] +; X64: movl %esi, %edi +; X64: callq bar +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq %[[fp]], %r11 +; X64: callq __x86_indirect_thunk_r11 +; X64: movl %[[x]], %edi +; X64: callq bar +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq %[[fp]], %r11 +; X64: jmp __x86_indirect_thunk_r11 # TAILCALL + +; X64FAST-LABEL: icall_reg: +; X64FAST: callq bar +; X64FAST: callq __x86_indirect_thunk_r11 +; X64FAST: callq bar +; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL + + +@global_fp = external global void (i32)* + +; Test an indirect call through a global variable. +define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 { + %fp1 = load void (i32)*, void (i32)** @global_fp + call void %fp1(i32 %x) + %fp2 = load void (i32)*, void (i32)** @global_fp + tail call void %fp2(i32 %x) + ret void +} + +; X64-LABEL: icall_global_fp: +; X64-DAG: movl %edi, %[[x:[^ ]*]] +; X64-DAG: movq global_fp(%rip), %r11 +; X64: callq __x86_indirect_thunk_r11 +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq global_fp(%rip), %r11 +; X64: jmp __x86_indirect_thunk_r11 # TAILCALL + +; X64FAST-LABEL: icall_global_fp: +; X64FAST: movq global_fp(%rip), %r11 +; X64FAST: callq __x86_indirect_thunk_r11 +; X64FAST: movq global_fp(%rip), %r11 +; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL + + +%struct.Foo = type { void (%struct.Foo*)** } + +; Test an indirect call through a vtable. +define void @vcall(%struct.Foo* %obj) #0 { + %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0 + %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field + %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1 + %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot + tail call void %fp(%struct.Foo* %obj) + tail call void %fp(%struct.Foo* %obj) + ret void +} + +; X64-LABEL: vcall: +; X64: movq %rdi, %[[obj:[^ ]*]] +; X64: movq (%rdi), %[[vptr:[^ ]*]] +; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]] +; X64: movq %[[fp]], %r11 +; X64: callq __x86_indirect_thunk_r11 +; X64-DAG: movq %[[obj]], %rdi +; X64-DAG: movq %[[fp]], %r11 +; X64: jmp __x86_indirect_thunk_r11 # TAILCALL + +; X64FAST-LABEL: vcall: +; X64FAST: callq __x86_indirect_thunk_r11 +; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL + + +declare void @direct_callee() + +define void @direct_tail() #0 { + tail call void @direct_callee() + ret void +} + +; X64-LABEL: direct_tail: +; X64: jmp direct_callee # TAILCALL +; X64FAST-LABEL: direct_tail: +; X64FAST: jmp direct_callee # TAILCALL + + +declare void @nonlazybind_callee() #1 + +define void @nonlazybind_caller() #0 { + call void @nonlazybind_callee() + tail call void @nonlazybind_callee() + ret void +} + +; X64-LABEL: nonlazybind_caller: +; X64: movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]] +; X64: movq %[[REG]], %r11 +; X64: callq __x86_indirect_thunk_r11 +; X64: movq %[[REG]], %r11 +; X64: jmp __x86_indirect_thunk_r11 # TAILCALL +; X64FAST-LABEL: nonlazybind_caller: +; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 +; X64FAST: callq __x86_indirect_thunk_r11 +; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 +; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL + + +; Check that a switch gets lowered using a jump table +define void @switch_jumptable(i32* %ptr, i64* %sink) #0 { +; X64-LABEL: switch_jumptable: +; X64_NOT: jmpq * +entry: + br label %header + +header: + %i = load volatile i32, i32* %ptr + switch i32 %i, label %bb0 [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + i32 5, label %bb5 + i32 6, label %bb6 + i32 7, label %bb7 + i32 8, label %bb8 + i32 9, label %bb9 + ] + +bb0: + store volatile i64 0, i64* %sink + br label %header + +bb1: + store volatile i64 1, i64* %sink + br label %header + +bb2: + store volatile i64 2, i64* %sink + br label %header + +bb3: + store volatile i64 3, i64* %sink + br label %header + +bb4: + store volatile i64 4, i64* %sink + br label %header + +bb5: + store volatile i64 5, i64* %sink + br label %header + +bb6: + store volatile i64 6, i64* %sink + br label %header + +bb7: + store volatile i64 7, i64* %sink + br label %header + +bb8: + store volatile i64 8, i64* %sink + br label %header + +bb9: + store volatile i64 9, i64* %sink + br label %header +} + + +@indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0), + i8* blockaddress(@indirectbr_rewrite, %bb1), + i8* blockaddress(@indirectbr_rewrite, %bb2), + i8* blockaddress(@indirectbr_rewrite, %bb3), + i8* blockaddress(@indirectbr_rewrite, %bb4), + i8* blockaddress(@indirectbr_rewrite, %bb5), + i8* blockaddress(@indirectbr_rewrite, %bb6), + i8* blockaddress(@indirectbr_rewrite, %bb7), + i8* blockaddress(@indirectbr_rewrite, %bb8), + i8* blockaddress(@indirectbr_rewrite, %bb9)] + +; Check that when thunks are enabled the indirectbr instruction gets +; rewritten to use switch, and that in turn doesn't get lowered as a jump +; table. +define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #0 { +; X64-LABEL: indirectbr_rewrite: +; X64-NOT: jmpq * +entry: + %i0 = load i64, i64* %p + %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0 + %target0 = load i8*, i8** %target.i0 + indirectbr i8* %target0, [label %bb1, label %bb3] + +bb0: + store volatile i64 0, i64* %sink + br label %latch + +bb1: + store volatile i64 1, i64* %sink + br label %latch + +bb2: + store volatile i64 2, i64* %sink + br label %latch + +bb3: + store volatile i64 3, i64* %sink + br label %latch + +bb4: + store volatile i64 4, i64* %sink + br label %latch + +bb5: + store volatile i64 5, i64* %sink + br label %latch + +bb6: + store volatile i64 6, i64* %sink + br label %latch + +bb7: + store volatile i64 7, i64* %sink + br label %latch + +bb8: + store volatile i64 8, i64* %sink + br label %latch + +bb9: + store volatile i64 9, i64* %sink + br label %latch + +latch: + %i.next = load i64, i64* %p + %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next + %target.next = load i8*, i8** %target.i.next + ; Potentially hit a full 10 successors here so that even if we rewrite as + ; a switch it will try to be lowered with a jump table. + indirectbr i8* %target.next, [label %bb0, + label %bb1, + label %bb2, + label %bb3, + label %bb4, + label %bb5, + label %bb6, + label %bb7, + label %bb8, + label %bb9] +} + +; Lastly check that the necessary thunks were emitted. +; +; X64-LABEL: .section .text.__x86_indirect_thunk_r11,{{.*}},__x86_indirect_thunk_r11,comdat +; X64-NEXT: .hidden __x86_indirect_thunk_r11 +; X64-NEXT: .weak __x86_indirect_thunk_r11 +; X64: __x86_indirect_thunk_r11: +; X64-NEXT: # {{.*}} # %entry +; X64-NEXT: lfence +; X64-NEXT: jmpq *%r11 + +attributes #0 = { "target-features"="+lvi-cfi" } +attributes #1 = { nonlazybind }