Index: llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp =================================================================== --- llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp +++ llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp @@ -9,7 +9,30 @@ /// Description: This pass finds Load Value Injection (LVI) gadgets consisting /// of a load from memory (i.e., SOURCE), and any operation that may transmit /// the value loaded from memory over a covert channel, or use the value loaded -/// from memory to determine a branch/call target (i.e., SINK). +/// from memory to determine a branch/call target (i.e., SINK). After finding +/// all such gadgets in a given function, the pass minimally inserts LFENCE +/// instructions in such a manner that the following property is satisfied: for +/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at +/// least one LFENCE instruction. The algorithm that implements this minimal +/// insertion is influenced by an academic paper that minimally inserts memory +/// fences for high-performance concurrent programs: +/// http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf +/// The algorithm implemented in this pass is as follows: +/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the +/// following components: +/// - SOURCE instructions (also includes function arguments) +/// - SINK instructions +/// - Basic block entry points +/// - Basic block terminators +/// - LFENCE instructions +/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e., +/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been +/// mitigated, go to step 6. +/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion. +/// 4. Insert one LFENCE along each CFG edge that was cut in step 3. +/// 5. Go to step 2. +/// 6. If any LFENCEs were inserted, return `true` from runOnFunction() to tell +/// LLVM that the function was modified. /// //===----------------------------------------------------------------------===// @@ -37,6 +60,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" @@ -45,11 +69,16 @@ #define PASS_KEY "x86-lvi-load" #define DEBUG_TYPE PASS_KEY +STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation"); STATISTIC(NumFunctionsConsidered, "Number of functions analyzed"); STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations " "were deployed"); STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis"); +static cl::opt OptimizePluginPath( + PASS_KEY "-opt-plugin", + cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden); + static cl::opt NoConditionalBranches( PASS_KEY "-no-cbranch", cl::desc("Don't treat conditional branches as disclosure gadgets. This " @@ -80,6 +109,12 @@ "may improve performance, at the cost of security."), cl::init(false), cl::Hidden); +static llvm::sys::DynamicLibrary OptimizeDL{}; +typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size, + unsigned int *edges, int *edge_values, + int *cut_edges /* out */, unsigned int edges_size); +static OptimizeCutT OptimizeCut = nullptr; + namespace { struct MachineGadgetGraph : ImmutableGraph { @@ -131,6 +166,11 @@ getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI, const MachineDominatorTree &MDT, const MachineDominanceFrontier &MDF, bool FixedLoads) const; + std::unique_ptr + elimEdges(std::unique_ptr Graph) const; + void cutEdges(MachineGadgetGraph &G, EdgeSet &CutEdges /* out */) const; + int insertFences(MachineFunction &MF, MachineGadgetGraph &G, + EdgeSet &CutEdges /* in, out */) const; bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const; bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const; @@ -233,14 +273,17 @@ TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); LLVM_DEBUG(dbgs() << "Hardening data-dependent loads...\n"); - hardenLoads(MF, false); + int FencesInserted = hardenLoads(MF, false); LLVM_DEBUG(dbgs() << "Hardening data-dependent loads... Done\n"); if (!NoFixedLoads) { LLVM_DEBUG(dbgs() << "Hardening fixed loads...\n"); - hardenLoads(MF, true); + FencesInserted += hardenLoads(MF, true); LLVM_DEBUG(dbgs() << "Hardening fixed loads... Done\n"); } - return false; + if (FencesInserted > 0) + ++NumFunctionsMitigated; + NumFences += FencesInserted; + return (FencesInserted > 0); } static void WriteGadgetGraph(raw_ostream &OS, MachineFunction &MF, @@ -254,6 +297,8 @@ // loads; otherwise, mitigation will be applied to non-fixed loads. int X86LoadValueInjectionLoadHardeningPass::hardenLoads(MachineFunction &MF, bool FixedLoads) const { + int FencesInserted = 0; + LLVM_DEBUG(dbgs() << "Building gadget graph...\n"); const auto &MLI = getAnalysis(); const auto &MDT = getAnalysis(); @@ -287,7 +332,27 @@ return 0; } - return 0; + do { + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n"); + std::unique_ptr ElimGraph = elimEdges(std::move(Graph)); + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n"); + if (ElimGraph->NumGadgets == 0) + break; + + EdgeSet CutEdges{*ElimGraph}; + LLVM_DEBUG(dbgs() << "Cutting edges...\n"); + cutEdges(*ElimGraph, CutEdges); + LLVM_DEBUG(dbgs() << "Cutting edges... Done\n"); + + LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n"); + FencesInserted += insertFences(MF, *ElimGraph, CutEdges); + LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n"); + + Graph = GraphBuilder::trim( + *ElimGraph, MachineGadgetGraph::NodeSet{*ElimGraph}, CutEdges); + } while (true); + + return FencesInserted; } std::unique_ptr @@ -461,6 +526,192 @@ return G; } +std::unique_ptr +X86LoadValueInjectionLoadHardeningPass::elimEdges( + std::unique_ptr Graph) const { + MachineGadgetGraph::NodeSet ElimNodes{*Graph}; + MachineGadgetGraph::EdgeSet ElimEdges{*Graph}; + + if (Graph->NumFences > 0) { // eliminate fences + for (const auto &E : Graph->edges()) { + const MachineGadgetGraph::Node *Dest = E.getDest(); + if (isFence(Dest->getValue())) { + ElimNodes.insert(*Dest); + ElimEdges.insert(E); + llvm::for_each(Dest->edges(), + [&ElimEdges](const MachineGadgetGraph::Edge &E) { + ElimEdges.insert(E); + }); + } + } + LLVM_DEBUG(dbgs() << "Eliminated " << ElimNodes.count() + << " fence nodes\n"); + } + + // Eliminate gadget edges that are mitigated. + int NumGadgets = 0; + MachineGadgetGraph::NodeSet Visited{*Graph}, GadgetSinks{*Graph}; + MachineGadgetGraph::EdgeSet ElimGadgets{*Graph}; + for (const auto &RootN : Graph->nodes()) { + // collect the gadgets for this node + for (const auto &E : RootN.edges()) { + if (MachineGadgetGraph::isGadgetEdge(E)) { + ++NumGadgets; + ElimGadgets.insert(E); + GadgetSinks.insert(*E.getDest()); + } + } + if (GadgetSinks.empty()) + continue; + std::function TraverseDFS = + [&](const MachineGadgetGraph::Node *N, bool FirstNode) { + if (!FirstNode) { + Visited.insert(*N); + if (GadgetSinks.contains(*N)) { + for (const auto &E : RootN.edges()) { + if (MachineGadgetGraph::isGadgetEdge(E) && E.getDest() == N) + ElimGadgets.erase(E); + } + } + } + for (const auto &E : N->edges()) { + const MachineGadgetGraph::Node *Dest = E.getDest(); + if (MachineGadgetGraph::isCFGEdge(E) && !Visited.contains(*Dest) && + !ElimEdges.contains(E)) + TraverseDFS(Dest, false); + } + }; + TraverseDFS(&RootN, true); + Visited.clear(); + GadgetSinks.clear(); + } + LLVM_DEBUG(dbgs() << "Eliminated " << ElimGadgets.count() + << " gadget edges\n"); + ElimEdges |= ElimGadgets; + + if (!(ElimEdges.empty() && ElimNodes.empty())) { + int NumRemainingGadgets = NumGadgets - ElimGadgets.count(); + Graph = GraphBuilder::trim(*Graph, ElimNodes, ElimEdges, 0 /* NumFences */, + NumRemainingGadgets); + } else { + Graph->NumFences = 0; + Graph->NumGadgets = NumGadgets; + } + return Graph; +} + +void X86LoadValueInjectionLoadHardeningPass::cutEdges( + MachineGadgetGraph &G, + MachineGadgetGraph::EdgeSet &CutEdges /* out */) const { + if (!OptimizePluginPath.empty()) { + if (!OptimizeDL.isValid()) { + std::string ErrorMsg{}; + OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary( + OptimizePluginPath.c_str(), &ErrorMsg); + if (!ErrorMsg.empty()) + report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"'); + OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut"); + if (!OptimizeCut) + report_fatal_error("Invalid optimization plugin"); + } + std::vector Nodes(G.nodes_size() + 1 /* terminator node */); + std::vector Edges(G.edges_size()); + std::vector EdgeCuts(G.edges_size()); + std::vector EdgeValues(G.edges_size()); + for (const auto &N : G.nodes()) { + Nodes[G.getNodeIndex(N)] = G.getEdgeIndex(*N.edges_begin()); + } + Nodes[G.nodes_size()] = G.edges_size(); // terminator node + for (const auto &E : G.edges()) { + Edges[G.getEdgeIndex(E)] = G.getNodeIndex(*E.getDest()); + EdgeValues[G.getEdgeIndex(E)] = E.getValue(); + } + OptimizeCut(Nodes.data(), G.nodes_size(), Edges.data(), EdgeValues.data(), + EdgeCuts.data(), G.edges_size()); + for (int I = 0; I < G.edges_size(); ++I) + if (EdgeCuts[I]) + CutEdges.set(I); + } else { // Use the default greedy heuristic + // Find the cheapest CFG edge that will eliminate a gadget (by being egress + // from a SOURCE node or ingress to a SINK node), and cut it. + MachineGadgetGraph::NodeSet GadgetSinks{G}; + const MachineGadgetGraph::Edge *CheapestSoFar = nullptr; + for (const auto &N : G.nodes()) { + for (const auto &E : N.edges()) { + if (MachineGadgetGraph::isGadgetEdge(E)) { + // NI is a SOURCE node. Look for a cheap egress edge + for (const auto &EE : N.edges()) { + if (MachineGadgetGraph::isCFGEdge(EE)) { + if (!CheapestSoFar || EE.getValue() < CheapestSoFar->getValue()) + CheapestSoFar = &EE; + } + } + GadgetSinks.insert(*E.getDest()); + } else { // E is a CFG edge + if (GadgetSinks.contains(*E.getDest())) { + // The dest is a SINK node. Hence EI is an ingress edge + if (!CheapestSoFar || E.getValue() < CheapestSoFar->getValue()) + CheapestSoFar = &E; + } + } + } + } + assert(CheapestSoFar && "Failed to cut an edge"); + CutEdges.insert(*CheapestSoFar); + } + LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n"); +} + +int X86LoadValueInjectionLoadHardeningPass::insertFences( + MachineFunction &MF, MachineGadgetGraph &G, + EdgeSet &CutEdges /* in, out */) const { + int FencesInserted = 0, AdditionalEdgesCut = 0; + auto CutAllCFGEdges = + [&CutEdges, &AdditionalEdgesCut](const MachineGadgetGraph::Node *N) { + for (const auto &E : N->edges()) { + if (MachineGadgetGraph::isCFGEdge(E) && !CutEdges.contains(E)) { + CutEdges.insert(E); + ++AdditionalEdgesCut; + } + } + }; + for (const auto &N : G.nodes()) { + for (const auto &E : N.edges()) { + if (CutEdges.contains(E)) { + MachineInstr *MI = N.getValue(), *Prev; + MachineBasicBlock *MBB; + MachineBasicBlock::iterator InsertionPt; + if (MI == MachineGadgetGraph::ArgNodeSentinel) { + // insert LFENCE at beginning of entry block + MBB = &MF.front(); + InsertionPt = MBB->begin(); + Prev = nullptr; + } else if (MI->isBranch()) { // insert the LFENCE before the branch + MBB = MI->getParent(); + InsertionPt = MI; + Prev = MI->getPrevNode(); + CutAllCFGEdges(&N); + } else { // insert the LFENCE after the instruction + MBB = MI->getParent(); + InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end(); + Prev = InsertionPt == MBB->end() + ? (MBB->empty() ? nullptr : &MBB->back()) + : InsertionPt->getPrevNode(); + } + if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) && + (!Prev || !isFence(Prev))) { + BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE)); + ++FencesInserted; + } + } + } + } + LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n"); + LLVM_DEBUG(dbgs() << "Cut an additional " << AdditionalEdgesCut + << " edges during fence insertion\n"); + return FencesInserted; +} + bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory( const MachineInstr &MI, unsigned Reg) const { if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE || Index: llvm/test/CodeGen/X86/lvi-hardening-loads.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/lvi-hardening-loads.ll @@ -0,0 +1,102 @@ +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-CBFX +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-fixed < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-CB +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-cbranch < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-FX +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-fixed --x86-lvi-load-no-cbranch < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-BASE + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @test(i32** %secret, i32 %secret_size) #0 { +; X64-LABEL: test: +entry: + %secret.addr = alloca i32**, align 8 + %secret_size.addr = alloca i32, align 4 + %ret_val = alloca i32, align 4 + %i = alloca i32, align 4 + store i32** %secret, i32*** %secret.addr, align 8 + store i32 %secret_size, i32* %secret_size.addr, align 4 + store i32 0, i32* %ret_val, align 4 + call void @llvm.x86.sse2.lfence() + store i32 0, i32* %i, align 4 + br label %for.cond + +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: lfence +; X64-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: jmp .LBB0_1 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %1 = load i32, i32* %secret_size.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end + +; X64: .LBB0_1: # %for.cond +; X64-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-CBFX-NEXT: lfence +; X64-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax +; X64-CBFX-NEXT: lfence +; X64-NEXT: jge .LBB0_5 + +for.body: ; preds = %for.cond + %2 = load i32, i32* %i, align 4 + %rem = srem i32 %2, 2 + %cmp1 = icmp eq i32 %rem, 0 + br i1 %cmp1, label %if.then, label %if.end + +; X64: # %bb.2: # %for.body +; X64-NEXT: # in Loop: Header=BB0_1 Depth=1 +; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-CBFX-NEXT: lfence +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: shrl $31, %ecx +; X64-NEXT: addl %eax, %ecx +; X64-NEXT: andl $-2, %ecx +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: jne .LBB0_4 + +if.then: ; preds = %for.body + %3 = load i32**, i32*** %secret.addr, align 8 + %4 = load i32, i32* %ret_val, align 4 + %idxprom = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds i32*, i32** %3, i64 %idxprom + %5 = load i32*, i32** %arrayidx, align 8 + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %ret_val, align 4 + br label %if.end + +; X64: # %bb.3: # %if.then +; X64-NEXT: # in Loop: Header=BB0_1 Depth=1 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X64-CBFX-NEXT: lfence +; X64-FX-NEXT: lfence +; X64-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx +; X64-CBFX-NEXT: lfence +; X64-FX-NEXT: lfence +; X64-NEXT: movq (%rax,%rcx,8), %rax +; X64-NEXT: lfence +; X64-NEXT: movl (%rax), %eax +; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NEXT: jmp .LBB0_4 + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %7 = load i32, i32* %i, align 4 + %inc = add nsw i32 %7, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %8 = load i32, i32* %ret_val, align 4 + ret i32 %8 +} + +; Function Attrs: nounwind +declare void @llvm.x86.sse2.lfence() #1 + +attributes #0 = { "target-features"="+lvi-load-hardening" } +attributes #1 = { nounwind }