Index: llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp =================================================================== --- llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp +++ llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp @@ -9,7 +9,30 @@ /// Description: This pass finds Load Value Injection (LVI) gadgets consisting /// of a load from memory (i.e., SOURCE), and any operation that may transmit /// the value loaded from memory over a covert channel, or use the value loaded -/// from memory to determine a branch/call target (i.e., SINK). +/// from memory to determine a branch/call target (i.e., SINK). After finding +/// all such gadgets in a given function, the pass minimally inserts LFENCE +/// instructions in such a manner that the following property is satisfied: for +/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at +/// least one LFENCE instruction. The algorithm that implements this minimal +/// insertion is influenced by an academic paper that minimally inserts memory +/// fences for high-performance concurrent programs: +/// http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf +/// The algorithm implemented in this pass is as follows: +/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the +/// following components: +/// - SOURCE instructions (also includes function arguments) +/// - SINK instructions +/// - Basic block entry points +/// - Basic block terminators +/// - LFENCE instructions +/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e., +/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been +/// mitigated, go to step 6. +/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion. +/// 4. Insert one LFENCE along each CFG edge that was cut in step 3. +/// 5. Go to step 2. +/// 6. If any LFENCEs were inserted, return `true` from runOnFunction() to tell +/// LLVM that the function was modified. /// /// For performance purposes, this pass uses a custom data structure to /// implement the GadgetGraph: ImmutableGraph. As the name suggests, an @@ -53,6 +76,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" @@ -61,11 +85,16 @@ #define PASS_KEY "x86-lvi-load" #define DEBUG_TYPE PASS_KEY +STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation"); STATISTIC(NumFunctionsConsidered, "Number of functions analyzed"); STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations " "were deployed"); STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis"); +static cl::opt OptimizePluginPath( + PASS_KEY "-opt-plugin", + cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden); + static cl::opt NoConditionalBranches( PASS_KEY "-no-cbranch", cl::desc("Don't treat conditional branches as disclosure gadgets. This " @@ -96,6 +125,12 @@ "may improve performance, at the cost of security."), cl::init(false), cl::Hidden); +static llvm::sys::DynamicLibrary OptimizeDL{}; +typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size, + unsigned int *edges, int *edge_values, + int *cut_edges /* out */, unsigned int edges_size); +static OptimizeCutT OptimizeCut = nullptr; + #define ARG_NODE nullptr #define GADGET_EDGE ((int)(-1)) #define WEIGHT(EdgeValue) ((double)(2 * (EdgeValue) + 1)) @@ -155,6 +190,11 @@ getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI, const MachineDominatorTree &MDT, const MachineDominanceFrontier &MDF, bool FixedLoads) const; + std::unique_ptr + elimEdges(std::unique_ptr Graph) const; + void cutEdges(MachineGadgetGraph &G, EdgeSet &CutEdges /* out */) const; + int insertFences(MachineGadgetGraph &G, + EdgeSet &CutEdges /* in, out */) const; bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const; bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const; @@ -257,14 +297,17 @@ TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); LLVM_DEBUG(dbgs() << "Hardening data-dependent loads...\n"); - hardenLoads(MF, false); + int FencesInserted = hardenLoads(MF, false); LLVM_DEBUG(dbgs() << "Hardening data-dependent loads... Done\n"); if (!NoFixedLoads) { LLVM_DEBUG(dbgs() << "Hardening fixed loads...\n"); hardenLoads(MF, true); LLVM_DEBUG(dbgs() << "Hardening fixed loads... Done\n"); } - return false; + if (FencesInserted > 0) + ++NumFunctionsMitigated; + NumFences += FencesInserted; + return (FencesInserted > 0); } // Apply the mitigation to `MF`, return the number of fences inserted. @@ -272,6 +315,8 @@ // and non-fixed loads; otherwise, only non-fixed loads. int X86LoadValueInjectionLoadHardeningPass::hardenLoads(MachineFunction &MF, bool FixedLoads) const { + int FencesInserted = 0; + LLVM_DEBUG(dbgs() << "Building gadget graph...\n"); const auto &MLI = getAnalysis(); const auto &MDT = getAnalysis(); @@ -305,7 +350,27 @@ return 0; } - return 0; + do { + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n"); + std::unique_ptr ElimGraph = elimEdges(std::move(Graph)); + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n"); + if (ElimGraph->NumGadgets == 0) + break; + + EdgeSet CutEdges{*ElimGraph}; + LLVM_DEBUG(dbgs() << "Cutting edges...\n"); + cutEdges(*ElimGraph, CutEdges); + LLVM_DEBUG(dbgs() << "Cutting edges... Done\n"); + + LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n"); + FencesInserted += insertFences(*ElimGraph, CutEdges); + LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n"); + + Graph.reset(GraphBuilder::trim( + *ElimGraph, MachineGadgetGraph::NodeSet{*ElimGraph}, CutEdges)); + } while (true); + + return FencesInserted; } std::unique_ptr @@ -477,6 +542,213 @@ return G; } +std::unique_ptr +X86LoadValueInjectionLoadHardeningPass::elimEdges( + std::unique_ptr Graph) const { + MachineGadgetGraph::NodeSet ElimNodes{*Graph}; + MachineGadgetGraph::EdgeSet ElimEdges{*Graph}; + + if (Graph->NumFences > 0) { // eliminate fences + for (auto EI = Graph->edges_begin(), EE = Graph->edges_end(); EI != EE; + ++EI) { + GTraits::NodeRef Dest = GTraits::edge_dest(*EI); + if (isFence(Dest->value())) { + ElimNodes.insert(Dest); + ElimEdges.insert(EI); + std::for_each( + GTraits::child_edge_begin(Dest), GTraits::child_edge_end(Dest), + [&ElimEdges](GTraits::EdgeRef E) { ElimEdges.insert(&E); }); + } + } + LLVM_DEBUG(dbgs() << "Eliminated " << ElimNodes.count() + << " fence nodes\n"); + } + + // eliminate gadget edges that are mitigated + int NumGadgets = 0; + MachineGadgetGraph::NodeSet Visited{*Graph}, GadgetSinks{*Graph}; + MachineGadgetGraph::EdgeSet ElimGadgets{*Graph}; + for (auto NI = GTraits::nodes_begin(Graph.get()), + NE = GTraits::nodes_end(Graph.get()); + NI != NE; ++NI) { + // collect the gadgets for this node + for (auto EI = GTraits::child_edge_begin(*NI), + EE = GTraits::child_edge_end(*NI); + EI != EE; ++EI) { + if (MachineGadgetGraph::isGadgetEdge(*EI)) { + ++NumGadgets; + ElimGadgets.insert(EI); + GadgetSinks.insert(GTraits::edge_dest(*EI)); + } + } + if (GadgetSinks.empty()) + continue; + std::function TraverseDFS = + [&](GTraits::NodeRef N, bool FirstNode) { + if (!FirstNode) { + Visited.insert(N); + if (GadgetSinks.contains(N)) { + for (auto CEI = GTraits::child_edge_begin(*NI), + CEE = GTraits::child_edge_end(*NI); + CEI != CEE; ++CEI) { + if (MachineGadgetGraph::isGadgetEdge(*CEI) && + GTraits::edge_dest(*CEI) == N) + ElimGadgets.erase(CEI); + } + } + } + for (auto CEI = GTraits::child_edge_begin(N), + CEE = GTraits::child_edge_end(N); + CEI != CEE; ++CEI) { + GTraits::NodeRef Dest = GTraits::edge_dest(*CEI); + if (MachineGadgetGraph::isCFGEdge(*CEI) && + !Visited.contains(Dest) && !ElimEdges.contains(CEI)) + TraverseDFS(Dest, false); + } + }; + TraverseDFS(*NI, true); + Visited.clear(); + GadgetSinks.clear(); + } + LLVM_DEBUG(dbgs() << "Eliminated " << ElimGadgets.count() + << " gadget edges\n"); + ElimEdges |= ElimGadgets; + + if (!(ElimEdges.empty() && ElimNodes.empty())) { + int NumRemainingGadgets = NumGadgets - ElimGadgets.count(); + Graph.reset(GraphBuilder::trim(*Graph, ElimNodes, ElimEdges, + 0 /* NumFences */, NumRemainingGadgets)); + } else { + Graph->NumFences = 0; + Graph->NumGadgets = NumGadgets; + } + return Graph; +} + +void X86LoadValueInjectionLoadHardeningPass::cutEdges( + MachineGadgetGraph &G, + MachineGadgetGraph::EdgeSet &CutEdges /* out */) const { + if (!OptimizePluginPath.empty()) { + if (!OptimizeDL.isValid()) { + std::string ErrorMsg{}; + OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary( + OptimizePluginPath.c_str(), &ErrorMsg); + if (!ErrorMsg.empty()) + report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"'); + OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut"); + if (!OptimizeCut) + report_fatal_error("Invalid optimization plugin"); + } + auto *Nodes = new unsigned int[G.nodes_size() + 1 /* terminator node */]; + auto *Edges = new unsigned int[G.edges_size()]; + auto *EdgeCuts = new int[G.edges_size()]; + auto *EdgeValues = new int[G.edges_size()]; + for (auto *NI = G.nodes_begin(), *NE = G.nodes_end(); NI != NE; ++NI) { + Nodes[std::distance(G.nodes_begin(), NI)] = + std::distance(G.edges_begin(), GTraits::child_edge_begin(NI)); + } + Nodes[G.nodes_size()] = G.edges_size(); // terminator node + for (auto *EI = G.edges_begin(), *EE = G.edges_end(); EI != EE; ++EI) { + Edges[std::distance(G.edges_begin(), EI)] = + std::distance(G.nodes_begin(), GTraits::edge_dest(*EI)); + EdgeValues[std::distance(G.edges_begin(), EI)] = EI->value(); + } + OptimizeCut(Nodes, G.nodes_size(), Edges, EdgeValues, EdgeCuts, + G.edges_size()); + for (int I = 0; I < G.edges_size(); ++I) { + if (EdgeCuts[I]) + CutEdges.set(I); + } + delete[] Nodes; + delete[] Edges; + delete[] EdgeCuts; + delete[] EdgeValues; + } else { // Use the default greedy heuristic + // Find the cheapest CFG edge that will eliminate a gadget (by being egress + // from a SOURCE node or ingress to a SINK node), and cut it. + MachineGadgetGraph::NodeSet GadgetSinks{G}; + MachineGadgetGraph::Edge *CheapestSoFar = nullptr; + for (auto NI = GTraits::nodes_begin(&G), NE = GTraits::nodes_end(&G); + NI != NE; ++NI) { + for (auto EI = GTraits::child_edge_begin(*NI), + EE = GTraits::child_edge_end(*NI); + EI != EE; ++EI) { + if (MachineGadgetGraph::isGadgetEdge(*EI)) { + // NI is a SOURCE node. Look for a cheap egress edge + for (auto EEI = GTraits::child_edge_begin(*NI); EEI != EE; ++EEI) { + if (MachineGadgetGraph::isCFGEdge(*EEI)) { + if (!CheapestSoFar || EEI->value() < CheapestSoFar->value()) + CheapestSoFar = EEI; + } + } + GadgetSinks.insert(GTraits::edge_dest(*EI)); + } else { // EI is a CFG edge + if (GadgetSinks.contains(GTraits::edge_dest(*EI))) { + // The dest is a SINK node. Hence EI is an ingress edge + if (!CheapestSoFar || EI->value() < CheapestSoFar->value()) + CheapestSoFar = EI; + } + } + } + } + assert(CheapestSoFar && "Failed to cut an edge"); + CutEdges.insert(CheapestSoFar); + } + LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n"); +} + +int X86LoadValueInjectionLoadHardeningPass::insertFences( + MachineGadgetGraph &G, EdgeSet &CutEdges /* in, out */) const { + int FencesInserted = 0, AdditionalEdgesCut = 0; + auto CutAllCFGEdges = [&CutEdges, &AdditionalEdgesCut](GTraits::NodeRef N) { + for (auto CEI = GTraits::child_edge_begin(N), + CEE = GTraits::child_edge_end(N); + CEI != CEE; ++CEI) { + if (MachineGadgetGraph::isCFGEdge(*CEI) && !CutEdges.contains(CEI)) { + CutEdges.insert(CEI); + ++AdditionalEdgesCut; + } + } + }; + for (auto NI = GTraits::nodes_begin(&G), NE = GTraits::nodes_end(&G); + NI != NE; ++NI) { + for (auto CEI = GTraits::child_edge_begin(*NI), + CEE = GTraits::child_edge_end(*NI); + CEI != CEE; ++CEI) { + if (CutEdges.contains(CEI)) { + MachineInstr *MI = (*NI)->value(), *Prev; + MachineBasicBlock *MBB; + MachineBasicBlock::iterator InsertionPt; + if (MI == ARG_NODE) { // insert LFENCE at beginning of entry block + MBB = &G.getMF().front(); + InsertionPt = MBB->begin(); + Prev = nullptr; + } else if (MI->isBranch()) { // insert the LFENCE before the branch + MBB = MI->getParent(); + InsertionPt = MI; + Prev = MI->getPrevNode(); + CutAllCFGEdges(*NI); + } else { // insert the LFENCE after the instruction + MBB = MI->getParent(); + InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end(); + Prev = InsertionPt == MBB->end() + ? (MBB->empty() ? nullptr : &MBB->back()) + : InsertionPt->getPrevNode(); + } + if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) && + (!Prev || !isFence(Prev))) { + BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE)); + ++FencesInserted; + } + } + } + } + LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n"); + LLVM_DEBUG(dbgs() << "Cut an additional " << AdditionalEdgesCut + << " edges during fence insertion\n"); + return FencesInserted; +} + bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory( const MachineInstr &MI, unsigned Reg) const { if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE || Index: llvm/test/CodeGen/X86/lvi-hardening-loads.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/lvi-hardening-loads.ll @@ -0,0 +1,102 @@ +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-CBFX +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-fixed < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-CB +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-cbranch < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-FX +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-fixed --x86-lvi-load-no-cbranch < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-BASE + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @test(i32** %secret, i32 %secret_size) #0 { +; X64-LABEL: test: +entry: + %secret.addr = alloca i32**, align 8 + %secret_size.addr = alloca i32, align 4 + %ret_val = alloca i32, align 4 + %i = alloca i32, align 4 + store i32** %secret, i32*** %secret.addr, align 8 + store i32 %secret_size, i32* %secret_size.addr, align 4 + store i32 0, i32* %ret_val, align 4 + call void @llvm.x86.sse2.lfence() + store i32 0, i32* %i, align 4 + br label %for.cond + +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: lfence +; X64-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: jmp .LBB0_1 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %1 = load i32, i32* %secret_size.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end + +; X64: .LBB0_1: # %for.cond +; X64-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-CBFX-NEXT: lfence +; X64-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax +; X64-CBFX-NEXT: lfence +; X64-NEXT: jge .LBB0_5 + +for.body: ; preds = %for.cond + %2 = load i32, i32* %i, align 4 + %rem = srem i32 %2, 2 + %cmp1 = icmp eq i32 %rem, 0 + br i1 %cmp1, label %if.then, label %if.end + +; X64: # %bb.2: # %for.body +; X64-NEXT: # in Loop: Header=BB0_1 Depth=1 +; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-CBFX-NEXT: lfence +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: shrl $31, %ecx +; X64-NEXT: addl %eax, %ecx +; X64-NEXT: andl $-2, %ecx +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: jne .LBB0_4 + +if.then: ; preds = %for.body + %3 = load i32**, i32*** %secret.addr, align 8 + %4 = load i32, i32* %ret_val, align 4 + %idxprom = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds i32*, i32** %3, i64 %idxprom + %5 = load i32*, i32** %arrayidx, align 8 + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %ret_val, align 4 + br label %if.end + +; X64: # %bb.3: # %if.then +; X64-NEXT: # in Loop: Header=BB0_1 Depth=1 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X64-CBFX-NEXT: lfence +; X64-FX-NEXT: lfence +; X64-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx +; X64-CBFX-NEXT: lfence +; X64-FX-NEXT: lfence +; X64-NEXT: movq (%rax,%rcx,8), %rax +; X64-NEXT: lfence +; X64-NEXT: movl (%rax), %eax +; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NEXT: jmp .LBB0_4 + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %7 = load i32, i32* %i, align 4 + %inc = add nsw i32 %7, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %8 = load i32, i32* %ret_val, align 4 + ret i32 %8 +} + +; Function Attrs: nounwind +declare void @llvm.x86.sse2.lfence() #1 + +attributes #0 = { "target-features"="+lvi-load-hardening" } +attributes #1 = { nounwind }