Index: llvm/lib/Target/X86/X86.h =================================================================== --- llvm/lib/Target/X86/X86.h +++ llvm/lib/Target/X86/X86.h @@ -141,6 +141,7 @@ X86RegisterBankInfo &); FunctionPass *createX86LoadValueInjectionLoadHardeningPass(); +FunctionPass *createX86LoadValueInjectionLoadHardeningUnoptimizedPass(); FunctionPass *createX86LoadValueInjectionRetHardeningPass(); FunctionPass *createX86SpeculativeLoadHardeningPass(); FunctionPass *createX86SpeculativeExecutionSideEffectSuppression(); @@ -160,6 +161,7 @@ void initializeX86ExpandPseudoPass(PassRegistry &); void initializeX86FixupSetCCPassPass(PassRegistry &); void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); +void initializeX86LoadValueInjectionLoadHardeningUnoptimizedPassPass(PassRegistry &); void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &); void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &); void initializeX86OptimizeLEAPassPass(PassRegistry &); Index: llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp =================================================================== --- llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp +++ llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp @@ -822,3 +822,79 @@ FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() { return new X86LoadValueInjectionLoadHardeningPass(); } + +namespace { + +/// The `X86LoadValueInjectionLoadHardeningPass` above depends on expensive +/// analysis passes that add complexity to the pipeline. This complexity +/// can cause noticable overhead when no optimizations are enabled, i.e., -O0. +/// The purpose of `X86LoadValueInjectionLoadHardeningUnoptimizedPass` is to +/// provide the same security as the optimized pass, but without adding +/// unnecessary complexity to the LLVM pipeline. +/// +/// The behavior of this pass is simply to insert an LFENCE after every load +/// instruction. +class X86LoadValueInjectionLoadHardeningUnoptimizedPass + : public MachineFunctionPass { +public: + X86LoadValueInjectionLoadHardeningUnoptimizedPass() + : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "X86 Load Value Injection (LVI) Load Hardening (Unoptimized)"; + } + bool runOnMachineFunction(MachineFunction &MF) override; + static char ID; +}; + +} // end anonymous namespace + +char X86LoadValueInjectionLoadHardeningUnoptimizedPass::ID = 0; + +bool X86LoadValueInjectionLoadHardeningUnoptimizedPass::runOnMachineFunction( + MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() + << " *****\n"); + const X86Subtarget *STI = &MF.getSubtarget(); + if (!STI->useLVILoadHardening()) + return false; + + // FIXME: support 32-bit + if (!STI->is64Bit()) + report_fatal_error("LVI load hardening is only supported on 64-bit", false); + + // Don't skip functions with the "optnone" attr but participate in opt-bisect. + const Function &F = MF.getFunction(); + if (!F.hasOptNone() && skipFunction(F)) + return false; + + bool Modified = false; + ++NumFunctionsConsidered; + + const TargetInstrInfo *TII = STI->getInstrInfo(); + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (!MI.mayLoad() || MI.getOpcode() == X86::LFENCE || + MI.getOpcode() == X86::MFENCE) + continue; + + MachineBasicBlock::iterator InsertionPt = + MI.getNextNode() ? MI.getNextNode() : MBB.end(); + BuildMI(MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE)); + ++NumFences; + Modified = true; + } + } + + if (Modified) + ++NumFunctionsMitigated; + + return Modified; +} + +INITIALIZE_PASS(X86LoadValueInjectionLoadHardeningUnoptimizedPass, PASS_KEY, + "X86 LVI load hardening", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningUnoptimizedPass() { + return new X86LoadValueInjectionLoadHardeningUnoptimizedPass(); +} Index: llvm/lib/Target/X86/X86TargetMachine.cpp =================================================================== --- llvm/lib/Target/X86/X86TargetMachine.cpp +++ llvm/lib/Target/X86/X86TargetMachine.cpp @@ -497,7 +497,10 @@ void X86PassConfig::addPostRegAlloc() { addPass(createX86FloatingPointStackifierPass()); - addPass(createX86LoadValueInjectionLoadHardeningPass()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createX86LoadValueInjectionLoadHardeningPass()); + else + addPass(createX86LoadValueInjectionLoadHardeningUnoptimizedPass()); } void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); } Index: llvm/test/CodeGen/X86/O0-pipeline.ll =================================================================== --- llvm/test/CodeGen/X86/O0-pipeline.ll +++ llvm/test/CodeGen/X86/O0-pipeline.ll @@ -46,10 +46,7 @@ ; CHECK-NEXT: Fast Register Allocator ; CHECK-NEXT: Bundle Machine CFG Edges ; CHECK-NEXT: X86 FP Stackifier -; CHECK-NEXT: MachineDominator Tree Construction -; CHECK-NEXT: Machine Natural Loop Construction -; CHECK-NEXT: Machine Dominance Frontier Construction -; CHECK-NEXT: X86 Load Value Injection (LVI) Load Hardening +; CHECK-NEXT: X86 Load Value Injection (LVI) Load Hardening (Unoptimized) ; CHECK-NEXT: Fixup Statepoint Caller Saved ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter Index: llvm/test/CodeGen/X86/lvi-hardening-loads.ll =================================================================== --- llvm/test/CodeGen/X86/lvi-hardening-loads.ll +++ llvm/test/CodeGen/X86/lvi-hardening-loads.ll @@ -1,5 +1,6 @@ ; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-ALL ; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-cbranch < %s | FileCheck %s --check-prefix=X64 +; RUN: llc -O0 -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64-NOOPT ; Function Attrs: noinline nounwind optnone uwtable define dso_local i32 @test(i32** %secret, i32 %secret_size) #0 { @@ -24,6 +25,13 @@ ; X64-NEXT: movl $0, -{{[0-9]+}}(%rsp) ; X64-NEXT: jmp .LBB0_1 +; X64-NOOPT: # %bb.0: # %entry +; X64-NOOPT-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NOOPT-NEXT: movl %esi, -{{[0-9]+}}(%rsp) +; X64-NOOPT-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; X64-NOOPT-NEXT: lfence +; X64-NOOPT-NEXT: movl $0, -{{[0-9]+}}(%rsp) + for.cond: ; preds = %for.inc, %entry %0 = load i32, i32* %i, align 4 %1 = load i32, i32* %secret_size.addr, align 4 @@ -38,6 +46,14 @@ ; X64-ALL-NEXT: lfence ; X64-NEXT: jge .LBB0_5 +; X64-NOOPT: .LBB0_1: # %for.cond +; X64-NOOPT-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOOPT-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-NOOPT-NEXT: lfence +; X64-NOOPT-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax +; X64-NOOPT-NEXT: lfence +; X64-NOOPT-NEXT: jge .LBB0_6 + for.body: ; preds = %for.cond %2 = load i32, i32* %i, align 4 %rem = srem i32 %2, 2 @@ -55,6 +71,16 @@ ; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: jne .LBB0_4 +; X64-NOOPT: # %bb.2: # %for.body +; X64-NOOPT-NEXT: # in Loop: Header=BB0_1 Depth=1 +; X64-NOOPT-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-NOOPT-NEXT: lfence +; X64-NOOPT-NEXT: cltd +; X64-NOOPT-NEXT: movl $2, %ecx +; X64-NOOPT-NEXT: idivl %ecx +; X64-NOOPT-NEXT: cmpl $0, %edx +; X64-NOOPT-NEXT: jne .LBB0_4 + if.then: ; preds = %for.body %3 = load i32**, i32*** %secret.addr, align 8 %4 = load i32, i32* %ret_val, align 4 @@ -77,6 +103,18 @@ ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X64-NEXT: jmp .LBB0_4 +; X64-NOOPT: # %bb.3: # %if.then +; X64-NOOPT-NEXT: # in Loop: Header=BB0_1 Depth=1 +; X64-NOOPT-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X64-NOOPT-NEXT: lfence +; X64-NOOPT-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx +; X64-NOOPT-NEXT: lfence +; X64-NOOPT-NEXT: movq (%rax,%rcx,8), %rax +; X64-NOOPT-NEXT: lfence +; X64-NOOPT-NEXT: movl (%rax), %edx +; X64-NOOPT-NEXT: lfence +; X64-NOOPT-NEXT: movl %edx, -{{[0-9]+}}(%rsp) + if.end: ; preds = %if.then, %for.body br label %for.inc @@ -86,6 +124,14 @@ store i32 %inc, i32* %i, align 4 br label %for.cond +; X64-NOOPT: .LBB0_5: # %for.inc +; X64-NOOPT-NEXT: # in Loop: Header=BB0_1 Depth=1 +; X64-NOOPT-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-NOOPT-NEXT: lfence +; X64-NOOPT-NEXT: addl $1, %eax +; X64-NOOPT-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NOOPT-NEXT: jmp .LBB0_1 + for.end: ; preds = %for.cond %8 = load i32, i32* %ret_val, align 4 ret i32 %8