Changeset View
Standalone View
llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
//==-- X86LoadValueInjectionLoadHardening.cpp - LVI load hardening for x86 --=// | //==-- X86LoadValueInjectionLoadHardening.cpp - LVI load hardening for x86 --=// | ||||
// | // | ||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
// See https://llvm.org/LICENSE.txt for license information. | // See https://llvm.org/LICENSE.txt for license information. | ||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
// | // | ||||
//===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||
/// | /// | ||||
/// Description: This pass finds Load Value Injection (LVI) gadgets consisting | /// Description: This pass finds Load Value Injection (LVI) gadgets consisting | ||||
/// of a load from memory (i.e., SOURCE), and any operation that may transmit | /// of a load from memory (i.e., SOURCE), and any operation that may transmit | ||||
/// the value loaded from memory over a covert channel, or use the value loaded | /// the value loaded from memory over a covert channel, or use the value loaded | ||||
/// from memory to determine a branch/call target (i.e., SINK). | /// from memory to determine a branch/call target (i.e., SINK). After finding | ||||
/// all such gadgets in a given function, the pass minimally inserts LFENCE | |||||
/// instructions in such a manner that the following property is satisfied: for | |||||
/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at | |||||
/// least one LFENCE instruction. The algorithm that implements this minimal | |||||
/// insertion is influenced by an academic paper that minimally inserts memory | |||||
/// fences for high-performance concurrent programs: | |||||
/// http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf | |||||
/// The algorithm implemented in this pass is as follows: | |||||
/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the | |||||
/// following components: | |||||
/// - SOURCE instructions (also includes function arguments) | |||||
/// - SINK instructions | |||||
/// - Basic block entry points | |||||
/// - Basic block terminators | |||||
/// - LFENCE instructions | |||||
/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e., | |||||
/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been | |||||
/// mitigated, go to step 6. | |||||
/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion. | |||||
/// 4. Insert one LFENCE along each CFG edge that was cut in step 3. | |||||
/// 5. Go to step 2. | |||||
/// 6. If any LFENCEs were inserted, return `true` from runOnFunction() to tell | |||||
/// LLVM that the function was modified. | |||||
/// | /// | ||||
//===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||
#include "ImmutableGraph.h" | #include "ImmutableGraph.h" | ||||
#include "X86.h" | #include "X86.h" | ||||
#include "X86Subtarget.h" | #include "X86Subtarget.h" | ||||
#include "X86TargetMachine.h" | #include "X86TargetMachine.h" | ||||
#include "llvm/ADT/DenseMap.h" | #include "llvm/ADT/DenseMap.h" | ||||
Show All 11 Lines | |||||
#include "llvm/CodeGen/MachineLoopInfo.h" | #include "llvm/CodeGen/MachineLoopInfo.h" | ||||
#include "llvm/CodeGen/MachineRegisterInfo.h" | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||||
#include "llvm/CodeGen/RDFGraph.h" | #include "llvm/CodeGen/RDFGraph.h" | ||||
#include "llvm/CodeGen/RDFLiveness.h" | #include "llvm/CodeGen/RDFLiveness.h" | ||||
#include "llvm/InitializePasses.h" | #include "llvm/InitializePasses.h" | ||||
#include "llvm/Support/CommandLine.h" | #include "llvm/Support/CommandLine.h" | ||||
#include "llvm/Support/DOTGraphTraits.h" | #include "llvm/Support/DOTGraphTraits.h" | ||||
#include "llvm/Support/Debug.h" | #include "llvm/Support/Debug.h" | ||||
#include "llvm/Support/DynamicLibrary.h" | |||||
#include "llvm/Support/GraphWriter.h" | #include "llvm/Support/GraphWriter.h" | ||||
#include "llvm/Support/raw_ostream.h" | #include "llvm/Support/raw_ostream.h" | ||||
using namespace llvm; | using namespace llvm; | ||||
#define PASS_KEY "x86-lvi-load" | #define PASS_KEY "x86-lvi-load" | ||||
#define DEBUG_TYPE PASS_KEY | #define DEBUG_TYPE PASS_KEY | ||||
STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation"); | |||||
STATISTIC(NumFunctionsConsidered, "Number of functions analyzed"); | STATISTIC(NumFunctionsConsidered, "Number of functions analyzed"); | ||||
STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations " | STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations " | ||||
"were deployed"); | "were deployed"); | ||||
STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis"); | STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis"); | ||||
static cl::opt<std::string> OptimizePluginPath( | |||||
PASS_KEY "-opt-plugin", | |||||
cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden); | |||||
static cl::opt<bool> NoConditionalBranches( | static cl::opt<bool> NoConditionalBranches( | ||||
PASS_KEY "-no-cbranch", | PASS_KEY "-no-cbranch", | ||||
cl::desc("Don't treat conditional branches as disclosure gadgets. This " | cl::desc("Don't treat conditional branches as disclosure gadgets. This " | ||||
"may improve performance, at the cost of security."), | "may improve performance, at the cost of security."), | ||||
cl::init(false), cl::Hidden); | cl::init(false), cl::Hidden); | ||||
static cl::opt<bool> EmitDot( | static cl::opt<bool> EmitDot( | ||||
PASS_KEY "-dot", | PASS_KEY "-dot", | ||||
Show All 14 Lines | static cl::opt<bool> EmitDotVerify( | ||||
cl::init(false), cl::Hidden); | cl::init(false), cl::Hidden); | ||||
static cl::opt<bool> NoFixedLoads( | static cl::opt<bool> NoFixedLoads( | ||||
PASS_KEY "-no-fixed", | PASS_KEY "-no-fixed", | ||||
cl::desc("Don't mitigate RIP-relative or RSP-relative loads. This " | cl::desc("Don't mitigate RIP-relative or RSP-relative loads. This " | ||||
"may improve performance, at the cost of security."), | "may improve performance, at the cost of security."), | ||||
cl::init(false), cl::Hidden); | cl::init(false), cl::Hidden); | ||||
static llvm::sys::DynamicLibrary OptimizeDL{}; | |||||
typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size, | |||||
unsigned int *edges, int *edge_values, | |||||
int *cut_edges /* out */, unsigned int edges_size); | |||||
static OptimizeCutT OptimizeCut = nullptr; | |||||
#define ARG_NODE nullptr | #define ARG_NODE nullptr | ||||
#define GADGET_EDGE ((int)(-1)) | #define GADGET_EDGE ((int)(-1)) | ||||
#define WEIGHT(EdgeValue) ((double)(2 * (EdgeValue) + 1)) | #define WEIGHT(EdgeValue) ((double)(2 * (EdgeValue) + 1)) | ||||
namespace { | namespace { | ||||
class X86LoadValueInjectionLoadHardeningPass : public MachineFunctionPass { | class X86LoadValueInjectionLoadHardeningPass : public MachineFunctionPass { | ||||
public: | public: | ||||
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | private: | ||||
const TargetInstrInfo *TII; | const TargetInstrInfo *TII; | ||||
const TargetRegisterInfo *TRI; | const TargetRegisterInfo *TRI; | ||||
int hardenLoads(MachineFunction &MF, bool Fixed) const; | int hardenLoads(MachineFunction &MF, bool Fixed) const; | ||||
std::unique_ptr<MachineGadgetGraph> | std::unique_ptr<MachineGadgetGraph> | ||||
getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI, | getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI, | ||||
const MachineDominatorTree &MDT, | const MachineDominatorTree &MDT, | ||||
const MachineDominanceFrontier &MDF, bool FixedLoads) const; | const MachineDominanceFrontier &MDF, bool FixedLoads) const; | ||||
std::unique_ptr<MachineGadgetGraph> | |||||
elimEdges(std::unique_ptr<MachineGadgetGraph> Graph) const; | |||||
mattdr: In the implementation the latter two parameters are called `ElimEdges` and `ElimNodes`, which I… | |||||
void cutEdges(MachineGadgetGraph &G, EdgeSet &CutEdges /* out */) const; | |||||
int insertFences(MachineGadgetGraph &G, | |||||
EdgeSet &CutEdges /* in, out */) const; | |||||
bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const; | bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const; | ||||
bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const; | bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const; | ||||
template <unsigned K> bool hasLoadFrom(const MachineInstr &MI) const; | template <unsigned K> bool hasLoadFrom(const MachineInstr &MI) const; | ||||
bool instrAccessesStackSlot(const MachineInstr &MI) const; | bool instrAccessesStackSlot(const MachineInstr &MI) const; | ||||
bool instrAccessesConstantPool(const MachineInstr &MI) const; | bool instrAccessesConstantPool(const MachineInstr &MI) const; | ||||
bool instrAccessesGOT(const MachineInstr &MI) const; | bool instrAccessesGOT(const MachineInstr &MI) const; | ||||
inline bool instrIsFixedAccess(const MachineInstr &MI) const { | inline bool instrIsFixedAccess(const MachineInstr &MI) const { | ||||
▲ Show 20 Lines • Show All 86 Lines • ▼ Show 20 Lines | bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction( | ||||
const Function &F = MF.getFunction(); | const Function &F = MF.getFunction(); | ||||
if (!F.hasOptNone() && skipFunction(F)) | if (!F.hasOptNone() && skipFunction(F)) | ||||
return false; | return false; | ||||
++NumFunctionsConsidered; | ++NumFunctionsConsidered; | ||||
TII = STI->getInstrInfo(); | TII = STI->getInstrInfo(); | ||||
TRI = STI->getRegisterInfo(); | TRI = STI->getRegisterInfo(); | ||||
LLVM_DEBUG(dbgs() << "Hardening data-dependent loads...\n"); | LLVM_DEBUG(dbgs() << "Hardening data-dependent loads...\n"); | ||||
hardenLoads(MF, false); | int FencesInserted = hardenLoads(MF, false); | ||||
LLVM_DEBUG(dbgs() << "Hardening data-dependent loads... Done\n"); | LLVM_DEBUG(dbgs() << "Hardening data-dependent loads... Done\n"); | ||||
if (!NoFixedLoads) { | if (!NoFixedLoads) { | ||||
LLVM_DEBUG(dbgs() << "Hardening fixed loads...\n"); | LLVM_DEBUG(dbgs() << "Hardening fixed loads...\n"); | ||||
hardenLoads(MF, true); | FencesInserted += hardenLoads(MF, true); | ||||
Should this also have FencesInserted = hardenLoads? zbrid: Should this also have FencesInserted = hardenLoads? | |||||
LLVM_DEBUG(dbgs() << "Hardening fixed loads... Done\n"); | LLVM_DEBUG(dbgs() << "Hardening fixed loads... Done\n"); | ||||
} | } | ||||
return false; | if (FencesInserted > 0) | ||||
++NumFunctionsMitigated; | |||||
NumFences += FencesInserted; | |||||
return (FencesInserted > 0); | |||||
} | } | ||||
// Apply the mitigation to `MF`, return the number of fences inserted. | // Apply the mitigation to `MF`, return the number of fences inserted. | ||||
// If `FixedLoads` is `true`, then the mitigation will be applied to fixed | // If `FixedLoads` is `true`, then the mitigation will be applied to fixed | ||||
// loads; otherwise, mitigation will be applied to non-fixed loads. | // loads; otherwise, mitigation will be applied to non-fixed loads. | ||||
int X86LoadValueInjectionLoadHardeningPass::hardenLoads(MachineFunction &MF, | int X86LoadValueInjectionLoadHardeningPass::hardenLoads(MachineFunction &MF, | ||||
bool FixedLoads) const { | bool FixedLoads) const { | ||||
int FencesInserted = 0; | |||||
LLVM_DEBUG(dbgs() << "Building gadget graph...\n"); | LLVM_DEBUG(dbgs() << "Building gadget graph...\n"); | ||||
const auto &MLI = getAnalysis<MachineLoopInfo>(); | const auto &MLI = getAnalysis<MachineLoopInfo>(); | ||||
const auto &MDT = getAnalysis<MachineDominatorTree>(); | const auto &MDT = getAnalysis<MachineDominatorTree>(); | ||||
const auto &MDF = getAnalysis<MachineDominanceFrontier>(); | const auto &MDF = getAnalysis<MachineDominanceFrontier>(); | ||||
std::unique_ptr<MachineGadgetGraph> Graph = | std::unique_ptr<MachineGadgetGraph> Graph = | ||||
getGadgetGraph(MF, MLI, MDT, MDF, FixedLoads); | getGadgetGraph(MF, MLI, MDT, MDF, FixedLoads); | ||||
LLVM_DEBUG(dbgs() << "Building gadget graph... Done\n"); | LLVM_DEBUG(dbgs() << "Building gadget graph... Done\n"); | ||||
if (Graph == nullptr) | if (Graph == nullptr) | ||||
Show All 17 Lines | if (FileError) | ||||
errs() << FileError.message(); | errs() << FileError.message(); | ||||
WriteGraph(FileOut, Graph.get()); | WriteGraph(FileOut, Graph.get()); | ||||
FileOut.close(); | FileOut.close(); | ||||
LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n"); | LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n"); | ||||
if (EmitDotOnly) | if (EmitDotOnly) | ||||
return 0; | return 0; | ||||
} | } | ||||
return 0; | do { | ||||
LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n"); | |||||
std::unique_ptr<MachineGadgetGraph> ElimGraph = elimEdges(std::move(Graph)); | |||||
LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n"); | |||||
if (ElimGraph->NumGadgets == 0) | |||||
break; | |||||
EdgeSet CutEdges{*ElimGraph}; | |||||
LLVM_DEBUG(dbgs() << "Cutting edges...\n"); | |||||
cutEdges(*ElimGraph, CutEdges); | |||||
LLVM_DEBUG(dbgs() << "Cutting edges... Done\n"); | |||||
LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n"); | |||||
FencesInserted += insertFences(*ElimGraph, CutEdges); | |||||
LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n"); | |||||
Graph.reset(GraphBuilder::trim( | |||||
Not Done ReplyInline ActionsI know there was a lot of effort to make ImmutableGraph efficient, but this is an O(N) reallocation on every loop, right? In practice, how many times do we end up looping before we hit a fixed point? mattdr: I know there was a lot of effort to make `ImmutableGraph` efficient, but this is an `O(N)`… | |||||
The answers to your questions are "yes" and "until all of the gadgets have been mitigated". The intent here is to optimize for the MILP plugin: What I have done is rewrite the Greedy heuristic to keep track of which edges have been cut and eliminated, thus never actually having to rebuild the graph. sconstab: The answers to your questions are "yes" and "until all of the gadgets have been mitigated". The… | |||||
Not Done ReplyInline ActionsDid you mean to remove this call to GraphBuilder::trim()? As it stands right now the loop actually reallocates the graph twice per iteration -- once in trimMitigatedEdges, once in this call to trim(). mattdr: Did you mean to remove this call to `GraphBuilder::trim()`? As it stands right now the loop… | |||||
*ElimGraph, MachineGadgetGraph::NodeSet{*ElimGraph}, CutEdges)); | |||||
} while (true); | |||||
return FencesInserted; | |||||
} | } | ||||
std::unique_ptr<X86LoadValueInjectionLoadHardeningPass::MachineGadgetGraph> | std::unique_ptr<X86LoadValueInjectionLoadHardeningPass::MachineGadgetGraph> | ||||
X86LoadValueInjectionLoadHardeningPass::getGadgetGraph( | X86LoadValueInjectionLoadHardeningPass::getGadgetGraph( | ||||
MachineFunction &MF, const MachineLoopInfo &MLI, | MachineFunction &MF, const MachineLoopInfo &MLI, | ||||
const MachineDominatorTree &MDT, const MachineDominanceFrontier &MDF, | const MachineDominatorTree &MDT, const MachineDominanceFrontier &MDF, | ||||
bool FixedLoads) const { | bool FixedLoads) const { | ||||
using namespace rdf; | using namespace rdf; | ||||
▲ Show 20 Lines • Show All 73 Lines • ▼ Show 20 Lines | std::function<void(NodeAddr<DefNode *>)> AnalyzeDefUseChain = | ||||
Uses.push_back(Use); | Uses.push_back(Use); | ||||
} | } | ||||
} | } | ||||
for (auto N : Uses) { | for (auto N : Uses) { | ||||
NodeAddr<UseNode *> Use{N}; | NodeAddr<UseNode *> Use{N}; | ||||
if (NodesVisited.insert(Use.Id).second && AnalyzeUse(Use)) { | if (NodesVisited.insert(Use.Id).second && AnalyzeUse(Use)) { | ||||
NodeAddr<InstrNode *> Owner{Use.Addr->getOwner(DFG)}; | NodeAddr<InstrNode *> Owner{Use.Addr->getOwner(DFG)}; | ||||
NodeList Defs = Owner.Addr->members_if(DataFlowGraph::IsDef, DFG); | NodeList Defs = Owner.Addr->members_if(DataFlowGraph::IsDef, DFG); | ||||
std::for_each(Defs.begin(), Defs.end(), AnalyzeDefUseChain); | std::for_each(Defs.begin(), Defs.end(), AnalyzeDefUseChain); | ||||
doh this should be in the other patch. craig.topper: doh this should be in the other patch. | |||||
} | } | ||||
} | } | ||||
}; | }; | ||||
AnalyzeDefUseChain(Def); | AnalyzeDefUseChain(Def); | ||||
}; | }; | ||||
LLVM_DEBUG(dbgs() << "Analyzing def-use chains to find gadgets\n"); | LLVM_DEBUG(dbgs() << "Analyzing def-use chains to find gadgets\n"); | ||||
// Analyze function arguments | // Analyze function arguments | ||||
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | X86LoadValueInjectionLoadHardeningPass::getGadgetGraph( | ||||
// ARG_NODE is a pseudo-instruction that represents MF args in the GadgetGraph | // ARG_NODE is a pseudo-instruction that represents MF args in the GadgetGraph | ||||
GraphIter ArgNode = MaybeAddNode(ARG_NODE).first; | GraphIter ArgNode = MaybeAddNode(ARG_NODE).first; | ||||
TraverseCFG(&MF.front(), ArgNode, 0); | TraverseCFG(&MF.front(), ArgNode, 0); | ||||
std::unique_ptr<MachineGadgetGraph> G{Builder.get(FenceCount, GadgetCount)}; | std::unique_ptr<MachineGadgetGraph> G{Builder.get(FenceCount, GadgetCount)}; | ||||
LLVM_DEBUG(dbgs() << "Found " << GTraits::size(G.get()) << " nodes\n"); | LLVM_DEBUG(dbgs() << "Found " << GTraits::size(G.get()) << " nodes\n"); | ||||
return G; | return G; | ||||
} | } | ||||
std::unique_ptr<X86LoadValueInjectionLoadHardeningPass::MachineGadgetGraph> | |||||
X86LoadValueInjectionLoadHardeningPass::elimEdges( | |||||
Could be more descriptive to call this, say, trimMitigatedEdges mattdr: Could be more descriptive to call this, say, `trimMitigatedEdges` | |||||
Yes, done. sconstab: Yes, done. | |||||
This is a lot clearer now, thanks! Given what this is doing I think it makes sense to call it findMitigatedEdges. mattdr: This is a lot clearer now, thanks!
Given what this is doing I think it makes sense to call it… | |||||
I changed it to elimMitigatedEdgesAndNodes() sconstab: I changed it to `elimMitigatedEdgesAndNodes()` | |||||
std::unique_ptr<MachineGadgetGraph> Graph) const { | |||||
MachineGadgetGraph::NodeSet ElimNodes{*Graph}; | |||||
MachineGadgetGraph::EdgeSet ElimEdges{*Graph}; | |||||
"Eliminate CFG edges that target a fence, as they're trivially mitigated" mattdr: "Eliminate CFG edges that target a fence, as they're trivially mitigated" | |||||
if (Graph->NumFences > 0) { // eliminate fences | |||||
for (auto EI = Graph->edges_begin(), EE = Graph->edges_end(); EI != EE; | |||||
Not Done ReplyInline ActionsWould EdgeRef E : Graph->edges() be clearer here? Ditto for many other for loops. (not sure if the LLVM conventions dictate something specific for these loops) sconstab: Would `EdgeRef E : Graph->edges()` be clearer here? Ditto for many other `for` loops.
(not… | |||||
Personally, I'd prefer not to hide the & or *. And EdgeRef/NodeRef only exist in the Traits class not the main class. It's also confusing that NodeRef is a pointer and not a reference so I'd like to not leak that outside the Traits class where its easier to see. LLVM is pretty conservative about the use of auto, but I figured in this case it wouldn't be unreasonable for a reader to understand that edges() returns Edge objects. But I'm happing to change it to MachineGadgetGraph::Edge. craig.topper: Personally, I'd prefer not to hide the & or *. And EdgeRef/NodeRef only exist in the Traits… | |||||
++EI) { | |||||
Not Done ReplyInline ActionsHow are we sure we've removed all edges that pointed to this same destination node? mattdr: How are we sure we've removed all edges that pointed to this same destination node? | |||||
I think it does? This loop iterates through ALL edges. For each CFG edge that ingresses a fence, add that fence to ElimNodes, and add that edge and all egress CFG edges to ElimEdges. The loop does *not* skip over edges that have been added to ElimEdges, and therefore I think this should ensure that all CFG edges pointing to a given fence are removed. sconstab: I think it does? This loop iterates through ALL edges. For each CFG edge that ingresses a fence… | |||||
Sure enough, that makes sense. mattdr: Sure enough, that makes sense. | |||||
GTraits::NodeRef Dest = GTraits::edge_dest(*EI); | |||||
Not Done ReplyInline ActionsIn what circumstance will we add fences to the graph as sources or sinks? Can we just avoid that at the point of insertion, rather than running this extra culling pass on every iteration? mattdr: In what circumstance will we add fences to the graph as sources or sinks? Can we just avoid… | |||||
Fences are never added as sources or sinks. We need to know where the fences are so that we can eliminate gadget edges for which all CFG paths from source to sink cross a fence. We could perform this analysis at the point of insertion, i.e., during the getGadgetGraph() function, BUT this analysis would be much more expensive there because it would be performed on the actual MachineFunction, instead of on the gadget graph. sconstab: Fences are never added as sources or sinks. We need to know where the fences are so that we can… | |||||
if (isFence(Dest->value())) { | |||||
ElimNodes.insert(Dest); | |||||
ElimEdges.insert(EI); | |||||
Not Done ReplyInline ActionsThis caught me off guard the first time I saw it because I didn't realize that LLVM had its own implementation that takes a range. For readability, would it be better to prefix with llvm::? sconstab: This caught me off guard the first time I saw it because I didn't realize that LLVM had its own… | |||||
Agreed. I'll change. craig.topper: Agreed. I'll change. | |||||
std::for_each( | |||||
GTraits::child_edge_begin(Dest), GTraits::child_edge_end(Dest), | |||||
[&ElimEdges](GTraits::EdgeRef E) { ElimEdges.insert(&E); }); | |||||
} | |||||
} | |||||
LLVM_DEBUG(dbgs() << "Eliminated " << ElimNodes.count() | |||||
<< " fence nodes\n"); | |||||
} | |||||
To make it easier to follow, let's call this something like ReachableGadgetSinkNodes mattdr: To make it easier to follow, let's call this something like `ReachableGadgetSinkNodes` | |||||
But that isn't accurate.. The algorithm finds all reachable nodes, not just gadget sinks. I renamed to ReachableNodes. sconstab: But that isn't accurate.. The algorithm finds all reachable nodes, not just gadget sinks. I… | |||||
// eliminate gadget edges that are mitigated | |||||
int NumGadgets = 0; | |||||
Not Done ReplyInline ActionsI think this is just intended as an optimization -- it's not necessary for correctness. Assuming that's right, suggest removing it since it doesn't really make things faster but it does add some extra complexity. mattdr: I think this is just intended as an optimization -- it's not necessary for correctness. | |||||
Why do you think it doesn't make things faster? A majority of nodes in the graph are not gadget sinks, and this majority tends to grow rapidly as gadgets become mitigated. Each time this check passes, it saves an entire DFS traversal and one O(E) loop through the edges. sconstab: Why do you think it doesn't make things faster? A majority of nodes in the graph are not gadget… | |||||
Ack! Yes, it seems I consistently misread this function and missed the difference between isGadgetEdge in some places and IsCFGEdge in others. (As I mentioned, having both represented in the same graph is confusing.) Now I see this is a different check than is done in the DFS. mattdr: Ack! Yes, it seems I consistently misread this function and missed the difference between… | |||||
MachineGadgetGraph::NodeSet Visited{*Graph}, GadgetSinks{*Graph}; | |||||
MachineGadgetGraph::EdgeSet ElimGadgets{*Graph}; | |||||
for (auto NI = GTraits::nodes_begin(Graph.get()), | |||||
Maybe FindCFGReachableGadgetSinksDFS? mattdr: Maybe `FindCFGReachableGadgetSinksDFS`? | |||||
Similar to above, this is not accurate as it finds all reachable nodes, not just gadget sinks. I did rename to FindReachableNodes and added detail to the comment. sconstab: Similar to above, this is not accurate as it finds all reachable nodes, not just gadget sinks. | |||||
NE = GTraits::nodes_end(Graph.get()); | |||||
Not Done ReplyInline ActionsI would write something stronger here: Start off assuming all gadgets originating at this source node have been mitigated and can be removed. Later we will add back unmitigated gadgets by erasing them from the removal set. mattdr: I would write something stronger here:
Start off assuming all gadgets originating at this… | |||||
Well, I substantially rewrote this algorithm to build a set of mitigated edges, instead of trimming down a set of mitigated edges. I think that the new set of comments should be much easier to follow. sconstab: Well, I substantially rewrote this algorithm to build a set of mitigated edges, instead of… | |||||
NI != NE; ++NI) { | |||||
// collect the gadgets for this node | |||||
for (auto EI = GTraits::child_edge_begin(*NI), | |||||
EE = GTraits::child_edge_end(*NI); | |||||
EI != EE; ++EI) { | |||||
if (MachineGadgetGraph::isGadgetEdge(*EI)) { | |||||
++NumGadgets; | |||||
ElimGadgets.insert(EI); | |||||
GadgetSinks.insert(GTraits::edge_dest(*EI)); | |||||
} | |||||
Not Done ReplyInline ActionsThis is really, really hard to read and understand. I think that's in large part because we have this one graph that represents _both_ control-flow _and_ source/sink pairs. Given that it's the load-bearing part of the whole stack, let me offer the best way I've come up with to explain it, then a suggestion for making it simpler to follow. My summary based on a few readings: First we compute GadgetSinks, the set of gadget sinks whose source is the current root. Then we do a depth-first search of the control-flow graph to find all gadget sinks that are control-flow-reachable from the given root. When we find such a sink, we look to see if it is also in GadgetSinks -- again, a sink whose source is the current root -- at which point we know we have found an unmitigated gadget sink. We iterate through the root's gadget edges to find the specific edge that points to the current DFS node -- the unmitigated gadget sink -- and remove that edge from ElimGadgets, where we had previously added it on the presumption it was mitigated. One idea for making this easier to follow:
mattdr: This is really, really hard to read and understand. I think that's in large part because we… | |||||
You of course are correct that this was terribly difficult to follow. I actually found an even simpler solution that what you had suggested. For each RootN that is the source of at least one gadget, I DFS to find all of the Visited nodes, and then check to see which node members of Visited are also the destinations of a gadget edge rooted at RootN. sconstab: You of course are correct that this was terribly difficult to follow. I actually found an even… | |||||
} | |||||
Not Done ReplyInline ActionsShould this be MachineGadgetGraph::NodeRef? sconstab: Should this be `MachineGadgetGraph::NodeRef`? | |||||
if (GadgetSinks.empty()) | |||||
Not Done ReplyInline ActionsWe already capture RootN later on. We can remove the boolean conditional and make this more readable by writing if (*N != RootN) (maybe if (N != &RootN)? I've sort of lost track of the API for nodes and edges.) Pushing further: maybe we can just remove the tiny optimization we get by special-casing the root node in the interest of simplicity? mattdr: We already capture `RootN` later on. We can remove the boolean conditional and make this more… | |||||
Not Done ReplyInline ActionsStill think it makes sense to add the root node to the reachable set (as it's trivially reachable from itself) mattdr: Still think it makes sense to add the root node to the reachable set (as it's trivially… | |||||
continue; | |||||
std::function<void(GTraits::NodeRef, bool)> TraverseDFS = | |||||
[&](GTraits::NodeRef N, bool FirstNode) { | |||||
if (!FirstNode) { | |||||
Visited.insert(N); | |||||
if (GadgetSinks.contains(N)) { | |||||
for (auto CEI = GTraits::child_edge_begin(*NI), | |||||
CEE = GTraits::child_edge_end(*NI); | |||||
CEI != CEE; ++CEI) { | |||||
if (MachineGadgetGraph::isGadgetEdge(*CEI) && | |||||
GTraits::edge_dest(*CEI) == N) | |||||
Suggest putting this at the top of the loop so it's obvious from the beginning that results aren't intended to accrue from iteration to iteration. mattdr: Suggest putting this at the top of the loop so it's obvious from the beginning that results… | |||||
ElimGadgets.erase(CEI); | |||||
} | |||||
} | |||||
} | |||||
for (auto CEI = GTraits::child_edge_begin(N), | |||||
CEE = GTraits::child_edge_end(N); | |||||
CEI != CEE; ++CEI) { | |||||
GTraits::NodeRef Dest = GTraits::edge_dest(*CEI); | |||||
if (MachineGadgetGraph::isCFGEdge(*CEI) && | |||||
!Visited.contains(Dest) && !ElimEdges.contains(CEI)) | |||||
TraverseDFS(Dest, false); | |||||
} | |||||
}; | |||||
Not Done ReplyInline ActionsIt's weird to have a negated predicate like this in a conditional with an else. Let's either rewrite it or un-negate it and flip the if/else blocks. Rewrite: !ElimEdges.empty() || !ElimNodes.empty() mattdr: It's weird to have a negated predicate like this in a conditional with an `else`. Let's either… | |||||
Right. By the way, I decomposed elimEdges() into two functions:
sconstab: Right. By the way, I decomposed `elimEdges()` into two functions:
- `elimEdges()` just runs the… | |||||
TraverseDFS(*NI, true); | |||||
Visited.clear(); | |||||
GadgetSinks.clear(); | |||||
} | |||||
LLVM_DEBUG(dbgs() << "Eliminated " << ElimGadgets.count() | |||||
<< " gadget edges\n"); | |||||
ElimEdges |= ElimGadgets; | |||||
if (!(ElimEdges.empty() && ElimNodes.empty())) { | |||||
int NumRemainingGadgets = NumGadgets - ElimGadgets.count(); | |||||
Graph.reset(GraphBuilder::trim(*Graph, ElimNodes, ElimEdges, | |||||
likewise findEdgesToCut or similar mattdr: likewise `findEdgesToCut` or similar | |||||
0 /* NumFences */, NumRemainingGadgets)); | |||||
} else { | |||||
Graph->NumFences = 0; | |||||
I'm still not convinced it's worth the extra (untestable) complexity to add this plugin point, but I defer to the committer. mattdr: I'm still not convinced it's worth the extra (untestable) complexity to add this plugin point… | |||||
Graph->NumGadgets = NumGadgets; | |||||
} | |||||
Extra {} should be unnecessary mattdr: Extra `{}` should be unnecessary | |||||
return Graph; | |||||
} | |||||
void X86LoadValueInjectionLoadHardeningPass::cutEdges( | |||||
MachineGadgetGraph &G, | |||||
MachineGadgetGraph::EdgeSet &CutEdges /* out */) const { | |||||
if (!OptimizePluginPath.empty()) { | |||||
if (!OptimizeDL.isValid()) { | |||||
std::string ErrorMsg{}; | |||||
OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary( | |||||
OptimizePluginPath.c_str(), &ErrorMsg); | |||||
if (!ErrorMsg.empty()) | |||||
Not Done ReplyInline ActionsI think that std::unique_ptr<T[]> should work fine here... sconstab: I think that `std::unique_ptr<T[]>` should work fine here... | |||||
Agreed, but std::vector is far more common in the codebase and since it lives on the stack the extra capacity pointer shouldn't be a big deal. craig.topper: Agreed, but std::vector is far more common in the codebase and since it lives on the stack the… | |||||
report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"'); | |||||
OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut"); | |||||
if (!OptimizeCut) | |||||
I'm thinking about adding a method to Graph to get a node/edge's index so we can hide all these std::distance calls. craig.topper: I'm thinking about adding a method to Graph to get a node/edge's index so we can hide all these… | |||||
report_fatal_error("Invalid optimization plugin"); | |||||
} | |||||
auto *Nodes = new unsigned int[G.nodes_size() + 1 /* terminator node */]; | |||||
auto *Edges = new unsigned int[G.edges_size()]; | |||||
auto *EdgeCuts = new int[G.edges_size()]; | |||||
auto *EdgeValues = new int[G.edges_size()]; | |||||
Not Done ReplyInline ActionsWhy aren't these vectors? mattdr: Why aren't these `vector`s? | |||||
Not Done ReplyInline ActionsThey're being passed to a C-compatible interface, so intuitively it made more sense to me. I also get paranoid because the array sizes will be copied into the std::vector struct, and from experience it seems I can never be 100% certain these copies will be optimized away. sconstab: They're being passed to a C-compatible interface, so intuitively it made more sense to me. I… | |||||
Not Done ReplyInline Actions@mattdr (above) sconstab: @mattdr (above) | |||||
for (auto *NI = G.nodes_begin(), *NE = G.nodes_end(); NI != NE; ++NI) { | |||||
Nodes[std::distance(G.nodes_begin(), NI)] = | |||||
std::distance(G.edges_begin(), GTraits::child_edge_begin(NI)); | |||||
I really like that the refinement loop (trying to get to a fixed point) for the greedy algorithm is now entirely within this function. I'd suggest going one step further: take the loop out of runOnMachineFunction entirely and add one around the calls to the plugin in the lines above. That way we keep the implementation detail that "this needs to be run in a loop" as close to the actual algorithm as possible. mattdr: I really like that the refinement loop (trying to get to a fixed point) for the greedy… | |||||
} | |||||
Not Done ReplyInline ActionsI think this for can lose the brackets. sconstab: I think this `for` can lose the brackets. | |||||
Will do. craig.topper: Will do. | |||||
Nodes[G.nodes_size()] = G.edges_size(); // terminator node | |||||
for (auto *EI = G.edges_begin(), *EE = G.edges_end(); EI != EE; ++EI) { | |||||
Edges[std::distance(G.edges_begin(), EI)] = | |||||
std::distance(G.nodes_begin(), GTraits::edge_dest(*EI)); | |||||
EdgeValues[std::distance(G.edges_begin(), EI)] = EI->value(); | |||||
} | |||||
OptimizeCut(Nodes, G.nodes_size(), Edges, EdgeValues, EdgeCuts, | |||||
G.edges_size()); | |||||
for (int I = 0; I < G.edges_size(); ++I) { | |||||
Seems like NI is a vestige of a previous iteration mattdr: Seems like `NI` is a vestige of a previous iteration | |||||
if (EdgeCuts[I]) | |||||
CutEdges.set(I); | |||||
Not Done ReplyInline ActionsEvery edge touched by this loop will also be touched by the other leg of this loop at one point or another ("E is a CFG edge"). Can we find a way to avoid the O(E^2) inner loop? For example: what if we made the outermost loop edge-major? Keep sets of mitigated source-nodes and sink-nodes. Create a list of all edges by weight, sort it, then go through the edges from lowest to highest weight and use them to try to mitigate a new source or sink node. There's a good chance I'm missing something fundamental there, and I thank you in advance for your patience explaining it to me. But if I can I'd really like to avoid repeating the CheapestSoFar comparison on edge weights in two places. mattdr: Every edge touched by this loop will //also// be touched by the other leg of this loop at one… | |||||
Yes... in hindsight this whole algorithm was more than a bit sloppy on my part. I completely revamped the algorithm and I think that now it is O(N + E). Please check and make sure you agree. sconstab: Yes... in hindsight this whole algorithm was more than a bit sloppy on my part. I completely… | |||||
Not Done ReplyInline ActionsEach iteration loops over all edges and removes exactly one... so this is probably still O(E^2). Seems like we can get it down closer to O(E * lg E) if we:
The insight here is that edge weights don't change, so mitigating an edge doesn't change the ranking of other edges. That said, I'm happy with the readability of the current implementation and would be satisfied for now if we just add //FIXME: this is O(E^2), it could probably be O(E * lg E) with some work mattdr: Each iteration loops over all edges and removes exactly one... so this is probably still `O… | |||||
I think your description actually implies that this greedy heuristic cannot be less that O(E^2), right?
The "if it is still relevant" is an O(E) operation, so the whole thing should be O(E^2). Regardless, I did add a comment to this effect. sconstab: I think your description actually implies that this greedy heuristic cannot be less that `O… | |||||
} | |||||
delete[] Nodes; | |||||
delete[] Edges; | |||||
delete[] EdgeCuts; | |||||
delete[] EdgeValues; | |||||
} else { // Use the default greedy heuristic | |||||
// Find the cheapest CFG edge that will eliminate a gadget (by being egress | |||||
What guarantee do we have here that all of the gadget sinks have, in fact, been added to GadgetSinks? It seems to be up to the order in which we iterate through nodes and edges. mattdr: What guarantee do we have here that all of the gadget sinks have, in fact, been added to… | |||||
Yikes! Can't believe I overlooked this. I fixed the issue, and it looks like the algorithm had been cutting a few more edges in some cases than necessary. sconstab: Yikes! Can't believe I overlooked this. I fixed the issue, and it looks like the algorithm had… | |||||
// from a SOURCE node or ingress to a SINK node), and cut it. | |||||
NI also seems like it comes from a previous version mattdr: `NI` also seems like it comes from a previous version | |||||
MachineGadgetGraph::NodeSet GadgetSinks{G}; | |||||
MachineGadgetGraph::Edge *CheapestSoFar = nullptr; | |||||
for (auto NI = GTraits::nodes_begin(&G), NE = GTraits::nodes_end(&G); | |||||
NI != NE; ++NI) { | |||||
for (auto EI = GTraits::child_edge_begin(*NI), | |||||
EE = GTraits::child_edge_end(*NI); | |||||
EI != EE; ++EI) { | |||||
if (MachineGadgetGraph::isGadgetEdge(*EI)) { | |||||
// NI is a SOURCE node. Look for a cheap egress edge | |||||
for (auto EEI = GTraits::child_edge_begin(*NI); EEI != EE; ++EEI) { | |||||
if (MachineGadgetGraph::isCFGEdge(*EEI)) { | |||||
if (!CheapestSoFar || EEI->value() < CheapestSoFar->value()) | |||||
CheapestSoFar = EEI; | |||||
} | |||||
} | |||||
GadgetSinks.insert(GTraits::edge_dest(*EI)); | |||||
} else { // EI is a CFG edge | |||||
A comment for what this lambda does, and how it's intended to be used, is much appreciated for the top-to-bottom reader Probably something like: When we add an LFENCE before an instruction, remove any CFG edges that point to that instruction because they all now refer to a mitigated codepath mattdr: A comment for what this lambda does, and how it's intended to be used, is much appreciated for… | |||||
I simplified this by inlining lambda with a descriptive comment. sconstab: I simplified this by inlining lambda with a descriptive comment. | |||||
if (GadgetSinks.contains(GTraits::edge_dest(*EI))) { | |||||
// The dest is a SINK node. Hence EI is an ingress edge | |||||
if (!CheapestSoFar || EI->value() < CheapestSoFar->value()) | |||||
CheapestSoFar = EI; | |||||
Not Done ReplyInline ActionsMore places where NodeRef and EdgeRef might be clearer. sconstab: More places where `NodeRef` and `EdgeRef` might be clearer. | |||||
} | |||||
} | |||||
} | |||||
} | |||||
assert(CheapestSoFar && "Failed to cut an edge"); | |||||
CutEdges.insert(CheapestSoFar); | |||||
} | |||||
LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n"); | |||||
} | |||||
// Insert an LFENCE in this basic block mattdr: `// Insert an LFENCE in this basic block` | |||||
// ... at this point in the basic block mattdr: `// ... at this point in the basic block` | |||||
int X86LoadValueInjectionLoadHardeningPass::insertFences( | |||||
MachineGadgetGraph &G, EdgeSet &CutEdges /* in, out */) const { | |||||
int FencesInserted = 0, AdditionalEdgesCut = 0; | |||||
auto CutAllCFGEdges = [&CutEdges, &AdditionalEdgesCut](GTraits::NodeRef N) { | |||||
for (auto CEI = GTraits::child_edge_begin(N), | |||||
CEE = GTraits::child_edge_end(N); | |||||
CEI != CEE; ++CEI) { | |||||
if (MachineGadgetGraph::isCFGEdge(*CEI) && !CutEdges.contains(CEI)) { | |||||
CutEdges.insert(CEI); | |||||
++AdditionalEdgesCut; | |||||
} | |||||
} | |||||
}; | |||||
for (auto NI = GTraits::nodes_begin(&G), NE = GTraits::nodes_end(&G); | |||||
NI != NE; ++NI) { | |||||
for (auto CEI = GTraits::child_edge_begin(*NI), | |||||
CEE = GTraits::child_edge_end(*NI); | |||||
CEI != CEE; ++CEI) { | |||||
if (CutEdges.contains(CEI)) { | |||||
This one took me a bit. It seems like the summary is: add a fence unless it would be redundant, i.e. if we can see the next instruction and see it is itself a fence mattdr: This one took me a bit. It seems like the summary is: add a fence unless it would be redundant… | |||||
Right. I added a clarifying comment. sconstab: Right. I added a clarifying comment. | |||||
MachineInstr *MI = (*NI)->value(), *Prev; | |||||
MachineBasicBlock *MBB; | |||||
MachineBasicBlock::iterator InsertionPt; | |||||
if (MI == ARG_NODE) { // insert LFENCE at beginning of entry block | |||||
MBB = &G.getMF().front(); | |||||
InsertionPt = MBB->begin(); | |||||
Prev = nullptr; | |||||
} else if (MI->isBranch()) { // insert the LFENCE before the branch | |||||
MBB = MI->getParent(); | |||||
InsertionPt = MI; | |||||
You can skip the double-lookup and just do: if (MachineGadgetGraph::isCFGEdge(E)) { if (CutEdges.insert(E)) { // returns true if not already present ++AdditionalEdgesCut; } } mattdr: You can skip the double-lookup and just do:
```
if (MachineGadgetGraph::isCFGEdge(E)) {
if… | |||||
Prev = MI->getPrevNode(); | |||||
CutAllCFGEdges(*NI); | |||||
} else { // insert the LFENCE after the instruction | |||||
MBB = MI->getParent(); | |||||
InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end(); | |||||
Prev = InsertionPt == MBB->end() | |||||
? (MBB->empty() ? nullptr : &MBB->back()) | |||||
: InsertionPt->getPrevNode(); | |||||
} | |||||
if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) && | |||||
(!Prev || !isFence(Prev))) { | |||||
BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE)); | |||||
++FencesInserted; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n"); | |||||
LLVM_DEBUG(dbgs() << "Cut an additional " << AdditionalEdgesCut | |||||
<< " edges during fence insertion\n"); | |||||
return FencesInserted; | |||||
} | |||||
bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory( | bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory( | ||||
const MachineInstr &MI, unsigned Reg) const { | const MachineInstr &MI, unsigned Reg) const { | ||||
if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE || | if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE || | ||||
MI.getOpcode() == X86::SFENCE || MI.getOpcode() == X86::LFENCE) | MI.getOpcode() == X86::SFENCE || MI.getOpcode() == X86::LFENCE) | ||||
return false; | return false; | ||||
// FIXME: This does not handle pseudo loading instruction like TCRETURN* | // FIXME: This does not handle pseudo loading instruction like TCRETURN* | ||||
const MCInstrDesc &Desc = MI.getDesc(); | const MCInstrDesc &Desc = MI.getDesc(); | ||||
▲ Show 20 Lines • Show All 115 Lines • Show Last 20 Lines |
In the implementation the latter two parameters are called ElimEdges and ElimNodes, which I think are good names