Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2292,6 +2292,14 @@
   Group<m_Group>, Flags<[CoreOption,CC1Option]>;
 def mno_speculative_load_hardening : Flag<["-"], "mno-speculative-load-hardening">,
   Group<m_Group>, Flags<[CoreOption]>;
+def mlvi_hardening : Flag<["-"], "mlvi-hardening">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
+  HelpText<"Enable all mitigations for Load Value Injection (LVI)">;
+def mno_lvi_hardening : Flag<["-"], "mno-lvi-hardening">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
+  HelpText<"Disable mitigations for Load Value Injection (LVI)">;
+def mlvi_cfi : Flag<["-"], "mlvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
+  HelpText<"Enable only control-flow mitigations for Load Value Injection (LVI)">;
+def mno_lvi_cfi : Flag<["-"], "mno-lvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
+  HelpText<"Disable control-flow mitigations for Load Value Injection (LVI)">;
 
 def mrelax : Flag<["-"], "mrelax">, Group<m_riscv_Features_Group>,
   HelpText<"Enable linker relaxation">;
Index: clang/lib/Driver/ToolChains/Arch/X86.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -146,6 +146,7 @@
   // flags). This is a bit hacky but keeps existing usages working. We should
   // consider deprecating this and instead warn if the user requests external
   // retpoline thunks and *doesn't* request some form of retpolines.
+  auto SpectreOpt = clang::driver::options::ID::OPT_INVALID;
   if (Args.hasArgNoClaim(options::OPT_mretpoline, options::OPT_mno_retpoline,
                          options::OPT_mspeculative_load_hardening,
                          options::OPT_mno_speculative_load_hardening)) {
@@ -153,12 +154,14 @@
                      false)) {
       Features.push_back("+retpoline-indirect-calls");
       Features.push_back("+retpoline-indirect-branches");
+      SpectreOpt = options::OPT_mretpoline;
     } else if (Args.hasFlag(options::OPT_mspeculative_load_hardening,
                             options::OPT_mno_speculative_load_hardening,
                             false)) {
       // On x86, speculative load hardening relies on at least using retpolines
       // for indirect calls.
       Features.push_back("+retpoline-indirect-calls");
+      SpectreOpt = options::OPT_mspeculative_load_hardening;
     }
   } else if (Args.hasFlag(options::OPT_mretpoline_external_thunk,
                           options::OPT_mno_retpoline_external_thunk, false)) {
@@ -166,6 +169,26 @@
     // eventually switch to an error here.
     Features.push_back("+retpoline-indirect-calls");
     Features.push_back("+retpoline-indirect-branches");
+    SpectreOpt = options::OPT_mretpoline_external_thunk;
+  }
+
+  auto LVIOpt = clang::driver::options::ID::OPT_INVALID;
+  if (Args.hasFlag(options::OPT_mlvi_hardening, options::OPT_mno_lvi_hardening,
+                   false)) {
+    Features.push_back("+lvi-load-hardening");
+    Features.push_back("+lvi-cfi"); // load hardening implies CFI protection
+    LVIOpt = options::OPT_mlvi_hardening;
+  } else if (Args.hasFlag(options::OPT_mlvi_cfi, options::OPT_mno_lvi_cfi,
+                          false)) {
+    Features.push_back("+lvi-cfi");
+    LVIOpt = options::OPT_mlvi_cfi;
+  }
+
+  if (SpectreOpt != clang::driver::options::ID::OPT_INVALID &&
+      LVIOpt != clang::driver::options::ID::OPT_INVALID) {
+    D.Diag(diag::err_drv_argument_not_allowed_with)
+        << D.getOpts().getOptionName(SpectreOpt)
+        << D.getOpts().getOptionName(LVIOpt);
   }
 
   // Now add any that the user explicitly requested on the command line,
Index: llvm/lib/CodeGen/CMakeLists.txt
===================================================================
--- llvm/lib/CodeGen/CMakeLists.txt
+++ llvm/lib/CodeGen/CMakeLists.txt
@@ -115,6 +115,9 @@
   ProcessImplicitDefs.cpp
   PrologEpilogInserter.cpp
   PseudoSourceValue.cpp
+  RDFGraph.cpp
+  RDFLiveness.cpp
+  RDFRegisters.cpp
   ReachingDefAnalysis.cpp
   RegAllocBase.cpp
   RegAllocBasic.cpp
Index: llvm/lib/CodeGen/RDFGraph.cpp
===================================================================
--- llvm/lib/CodeGen/RDFGraph.cpp
+++ llvm/lib/CodeGen/RDFGraph.cpp
@@ -8,8 +8,6 @@
 //
 // Target-independent, SSA-based data flow graph for register data flow (RDF).
 //
-#include "RDFGraph.h"
-#include "RDFRegisters.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@@ -20,6 +18,8 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFRegisters.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -753,8 +753,10 @@
   const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
   if (RegisterId R = TLI.getExceptionPointerRegister(PF))
     LR.insert(RegisterRef(R));
-  if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
-    LR.insert(RegisterRef(R));
+  if (!isFuncletEHPersonality(classifyEHPersonality(PF))) {
+    if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
+      LR.insert(RegisterRef(R));
+  }
   return LR;
 }
 
Index: llvm/lib/CodeGen/RDFLiveness.cpp
===================================================================
--- llvm/lib/CodeGen/RDFLiveness.cpp
+++ llvm/lib/CodeGen/RDFLiveness.cpp
@@ -22,9 +22,6 @@
 // and Embedded Architectures and Compilers", 8 (4),
 // <10.1145/2086696.2086706>. <hal-00647369>
 //
-#include "RDFLiveness.h"
-#include "RDFGraph.h"
-#include "RDFRegisters.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@@ -33,6 +30,9 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFRegisters.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
Index: llvm/lib/CodeGen/RDFRegisters.cpp
===================================================================
--- llvm/lib/CodeGen/RDFRegisters.cpp
+++ llvm/lib/CodeGen/RDFRegisters.cpp
@@ -6,11 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "RDFRegisters.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/RDFRegisters.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
Index: llvm/lib/Target/Hexagon/CMakeLists.txt
===================================================================
--- llvm/lib/Target/Hexagon/CMakeLists.txt
+++ llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -64,9 +64,6 @@
   HexagonVLIWPacketizer.cpp
   RDFCopy.cpp
   RDFDeadCode.cpp
-  RDFGraph.cpp
-  RDFLiveness.cpp
-  RDFRegisters.cpp
 )
 
 add_subdirectory(AsmParser)
Index: llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
===================================================================
--- llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -12,9 +12,6 @@
 #include "HexagonInstrInfo.h"
 #include "HexagonSubtarget.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringRef.h"
@@ -27,6 +24,9 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/MC/MCInstrDesc.h"
Index: llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
===================================================================
--- llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
+++ llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -11,9 +11,6 @@
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "RDFCopy.h"
 #include "RDFDeadCode.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@@ -24,6 +21,9 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
Index: llvm/lib/Target/Hexagon/RDFCopy.h
===================================================================
--- llvm/lib/Target/Hexagon/RDFCopy.h
+++ llvm/lib/Target/Hexagon/RDFCopy.h
@@ -9,9 +9,9 @@
 #ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
 #define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
 
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include <map>
 #include <vector>
Index: llvm/lib/Target/Hexagon/RDFCopy.cpp
===================================================================
--- llvm/lib/Target/Hexagon/RDFCopy.cpp
+++ llvm/lib/Target/Hexagon/RDFCopy.cpp
@@ -11,13 +11,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "RDFCopy.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
Index: llvm/lib/Target/Hexagon/RDFDeadCode.h
===================================================================
--- llvm/lib/Target/Hexagon/RDFDeadCode.h
+++ llvm/lib/Target/Hexagon/RDFDeadCode.h
@@ -23,8 +23,8 @@
 #ifndef RDF_DEADCODE_H
 #define RDF_DEADCODE_H
 
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
 #include "llvm/ADT/SetVector.h"
 
 namespace llvm {
Index: llvm/lib/Target/Hexagon/RDFDeadCode.cpp
===================================================================
--- llvm/lib/Target/Hexagon/RDFDeadCode.cpp
+++ llvm/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -9,13 +9,13 @@
 // RDF-based generic dead code elimination.
 
 #include "RDFDeadCode.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
 
 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
 #include "llvm/Support/Debug.h"
 
 #include <queue>
Index: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
===================================================================
--- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -3152,6 +3152,64 @@
 void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
                                    MCStreamer &Out) {
   Out.emitInstruction(Inst, getSTI());
+
+  if (getSTI().getFeatureBits()[X86::FeatureLVILoadHardening]) {
+    auto Flags = Inst.getFlags();
+    if ((Flags & X86::REP_PREFIX) || (Flags & X86::REPNE_PREFIX)) {
+      switch (Inst.getOpcode()) {
+      case X86::CMPSB:
+      case X86::CMPSW:
+      case X86::CMPSL:
+      case X86::CMPSQ:
+      case X86::SCASB:
+      case X86::SCASW:
+      case X86::SCASL:
+      case X86::SCASQ:
+        Warning(Inst.getLoc(), "Instruction may be vulnerable to LVI and "
+                               "requires manual mitigation");
+        return;
+      }
+    }
+  }
+
+  if (getSTI().getFeatureBits()[X86::FeatureLVIControlFlowIntegrity]) {
+    switch (Inst.getOpcode()) {
+    case X86::RET:
+    case X86::RETL:
+    case X86::RETQ:
+    case X86::RETIL:
+    case X86::RETIQ:
+    case X86::RETIW:
+    case X86::JMP16m:
+    case X86::JMP32m:
+    case X86::JMP64m:
+    case X86::JMP64m_REX:
+    case X86::FARJMP16m:
+    case X86::FARJMP32m:
+    case X86::FARJMP64:
+    case X86::CALL16m:
+    case X86::CALL32m:
+    case X86::CALL64m:
+    case X86::FARCALL16m:
+    case X86::FARCALL32m:
+    case X86::FARCALL64:
+      Warning(Inst.getLoc(), "Instruction may be vulnerable to LVI and "
+                             "requires manual mitigation");
+      return;
+    }
+  }
+
+  // If this instruction loads and we're hardening for LVI, emit an LFENCE.
+  if (getSTI().getFeatureBits()[X86::FeatureLVILoadHardening]) {
+    const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+    // LFENCE has the mayLoad property, don't double fence.
+    if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) {
+      MCInst FenceInst;
+      FenceInst.setOpcode(X86::LFENCE);
+      FenceInst.setLoc(Inst.getLoc());
+      Out.emitInstruction(FenceInst, getSTI());
+    }
+  }
 }
 
 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
Index: llvm/lib/Target/X86/CMakeLists.txt
===================================================================
--- llvm/lib/Target/X86/CMakeLists.txt
+++ llvm/lib/Target/X86/CMakeLists.txt
@@ -51,6 +51,9 @@
   X86InstrInfo.cpp
   X86EvexToVex.cpp
   X86LegalizerInfo.cpp
+  X86LoadValueInjectionIndirectThunks.cpp
+  X86LoadValueInjectionLoadHardening.cpp
+  X86LoadValueInjectionRetHardening.cpp
   X86MCInstLower.cpp
   X86MachineFunctionInfo.cpp
   X86MacroFusion.cpp
Index: llvm/lib/Target/X86/ImmutableGraph.h
===================================================================
--- /dev/null
+++ llvm/lib/Target/X86/ImmutableGraph.h
@@ -0,0 +1,415 @@
+//==========-- ImmutableGraph.h - A fast DAG implementation ---------=========//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IMMUTABLEGRAPH_H
+#define IMMUTABLEGRAPH_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <iterator>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+template <typename _NodeValueT, typename _EdgeValueT, typename _SizeT = int>
+class ImmutableGraph {
+  using Traits = GraphTraits<ImmutableGraph<_NodeValueT, _EdgeValueT> *>;
+  template <typename> friend class ImmutableGraphBuilder;
+
+public:
+  using NodeValueT = _NodeValueT;
+  using EdgeValueT = _EdgeValueT;
+  using size_type = _SizeT;
+  class Node;
+  class Edge {
+    friend class ImmutableGraph;
+    template <typename> friend class ImmutableGraphBuilder;
+    friend Traits;
+
+    Node *__dest;
+    EdgeValueT __value;
+
+  public:
+    EdgeValueT &value() { return __value; }
+  };
+  class Node {
+    friend class ImmutableGraph;
+    template <typename> friend class ImmutableGraphBuilder;
+    friend Traits;
+
+    Edge *__edges;
+    NodeValueT __value;
+
+  public:
+    NodeValueT &value() { return __value; }
+  };
+
+protected:
+  ImmutableGraph(Node *Nodes, size_type NodesSize, Edge *Edges,
+                 size_type EdgesSize)
+      : __nodes{Nodes}, __nodes_size{NodesSize}, __edges{Edges},
+        __edges_size{EdgesSize} {}
+  ImmutableGraph(const ImmutableGraph &) = delete;
+  ImmutableGraph(ImmutableGraph &&) = delete;
+  ImmutableGraph &operator=(const ImmutableGraph &) = delete;
+  ImmutableGraph &operator=(ImmutableGraph &&) = delete;
+
+public:
+  ~ImmutableGraph() {
+    delete[] __edges;
+    delete[] __nodes;
+  }
+
+  Node *nodes_begin() const { return __nodes; }
+  Node *nodes_end() const { return __nodes + __nodes_size; }
+  Edge *edges_begin() const { return __edges; }
+  Edge *edges_end() const { return __edges + __edges_size; }
+  size_type nodes_size() const { return __nodes_size; }
+  size_type edges_size() const { return __edges_size; }
+  bool empty() const { return __nodes_size == 0; }
+
+  class NodeSet {
+    friend class iterator;
+
+    const ImmutableGraph &__g;
+    BitVector __v;
+
+  public:
+    NodeSet(const ImmutableGraph &G, bool ContainsAll = false)
+        : __g{G}, __v{static_cast<unsigned>(__g.nodes_size()), ContainsAll} {}
+    bool insert(Node *N) {
+      size_type Idx = std::distance(__g.nodes_begin(), N);
+      bool AlreadyExists = __v.test(Idx);
+      __v.set(Idx);
+      return !AlreadyExists;
+    }
+    void erase(Node *N) {
+      size_type Idx = std::distance(__g.nodes_begin(), N);
+      __v.reset(Idx);
+    }
+    bool contains(Node *N) const {
+      size_type Idx = std::distance(__g.nodes_begin(), N);
+      return __v.test(Idx);
+    }
+    void clear() { __v.reset(); }
+    size_type empty() const { return __v.none(); }
+    /// Return the number of elements in the set
+    size_type count() const { return __v.count(); }
+    /// Return the size of the set's domain
+    size_type size() const { return __v.size(); }
+    /// Set union
+    NodeSet &operator|=(const NodeSet &RHS) {
+      assert(&this->__g == &RHS.__g);
+      __v |= RHS.__v;
+      return *this;
+    }
+    /// Set intersection
+    NodeSet &operator&=(const NodeSet &RHS) {
+      assert(&this->__g == &RHS.__g);
+      __v &= RHS.__v;
+      return *this;
+    }
+    /// Set disjoint union
+    NodeSet &operator^=(const NodeSet &RHS) {
+      assert(&this->__g == &RHS.__g);
+      __v ^= RHS.__v;
+      return *this;
+    }
+
+    using index_iterator = typename BitVector::const_set_bits_iterator;
+    index_iterator index_begin() const { return __v.set_bits_begin(); }
+    index_iterator index_end() const { return __v.set_bits_end(); }
+    void set(size_type Idx) { __v.set(Idx); }
+    void reset(size_type Idx) { __v.reset(Idx); }
+
+    class iterator {
+      const NodeSet &__set;
+      size_type __current;
+
+      void advance() {
+        assert(__current != -1);
+        __current = __set.__v.find_next(__current);
+      }
+
+    public:
+      iterator(const NodeSet &Set, size_type Begin)
+          : __set{Set}, __current{Begin} {}
+      iterator operator++(int) {
+        iterator Tmp = *this;
+        advance();
+        return Tmp;
+      }
+      iterator &operator++() {
+        advance();
+        return *this;
+      }
+      Node *operator*() const {
+        assert(__current != -1);
+        return __set.__g.nodes_begin() + __current;
+      }
+      bool operator==(const iterator &other) const {
+        assert(&this->__set == &other.__set);
+        return this->__current == other.__current;
+      }
+      bool operator!=(const iterator &other) const { return !(*this == other); }
+    };
+
+    iterator begin() const { return iterator{*this, __v.find_first()}; }
+    iterator end() const { return iterator{*this, -1}; }
+  };
+
+  class EdgeSet {
+    const ImmutableGraph &__g;
+    BitVector __v;
+
+  public:
+    EdgeSet(const ImmutableGraph &G, bool ContainsAll = false)
+        : __g{G}, __v{static_cast<unsigned>(__g.edges_size()), ContainsAll} {}
+    bool insert(Edge *E) {
+      size_type Idx = std::distance(__g.edges_begin(), E);
+      bool AlreadyExists = __v.test(Idx);
+      __v.set(Idx);
+      return !AlreadyExists;
+    }
+    void erase(Edge *E) {
+      size_type Idx = std::distance(__g.edges_begin(), E);
+      __v.reset(Idx);
+    }
+    bool contains(Edge *E) const {
+      size_type Idx = std::distance(__g.edges_begin(), E);
+      return __v.test(Idx);
+    }
+    void clear() { __v.reset(); }
+    bool empty() const { return __v.none(); }
+    /// Return the number of elements in the set
+    size_type count() const { return __v.count(); }
+    /// Return the size of the set's domain
+    size_type size() const { return __v.size(); }
+    /// Set union
+    EdgeSet &operator|=(const EdgeSet &RHS) {
+      assert(&this->__g == &RHS.__g);
+      __v |= RHS.__v;
+      return *this;
+    }
+    /// Set intersection
+    EdgeSet &operator&=(const EdgeSet &RHS) {
+      assert(&this->__g == &RHS.__g);
+      __v &= RHS.__v;
+      return *this;
+    }
+    /// Set disjoint union
+    EdgeSet &operator^=(const EdgeSet &RHS) {
+      assert(&this->__g == &RHS.__g);
+      __v ^= RHS.__v;
+      return *this;
+    }
+
+    using index_iterator = typename BitVector::const_set_bits_iterator;
+    index_iterator index_begin() const { return __v.set_bits_begin(); }
+    index_iterator index_end() const { return __v.set_bits_end(); }
+    void set(size_type Idx) { __v.set(Idx); }
+    void reset(size_type Idx) { __v.reset(Idx); }
+
+    class iterator {
+      const EdgeSet &__set;
+      size_type __current;
+
+      void advance() {
+        assert(__current != -1);
+        __current = __set.__v.find_next(__current);
+      }
+
+    public:
+      iterator(const EdgeSet &Set, size_type Begin)
+          : __set{Set}, __current{Begin} {}
+      iterator operator++(int) {
+        iterator Tmp = *this;
+        advance();
+        return Tmp;
+      }
+      iterator &operator++() {
+        advance();
+        return *this;
+      }
+      Edge *operator*() const {
+        assert(__current != -1);
+        return __set.__g.edges_begin() + __current;
+      }
+      bool operator==(const iterator &other) const {
+        assert(&this->__set == &other.__set);
+        return this->__current == other.__current;
+      }
+      bool operator!=(const iterator &other) const { return !(*this == other); }
+    };
+
+    iterator begin() const { return iterator{*this, __v.find_first()}; }
+    iterator end() const { return iterator{*this, -1}; }
+  };
+
+private:
+  Node *__nodes;
+  size_type __nodes_size;
+  Edge *__edges;
+  size_type __edges_size;
+};
+
+template <typename GraphT> class ImmutableGraphBuilder {
+  using NodeValueT = typename GraphT::NodeValueT;
+  using EdgeValueT = typename GraphT::EdgeValueT;
+  static_assert(
+      std::is_base_of<ImmutableGraph<NodeValueT, EdgeValueT>, GraphT>::value,
+      "Template argument to ImmutableGraphBuilder must derive from "
+      "ImmutableGraph<>");
+  using size_type = typename GraphT::size_type;
+  using NodeSet = typename GraphT::NodeSet;
+  using Node = typename GraphT::Node;
+  using EdgeSet = typename GraphT::EdgeSet;
+  using Edge = typename GraphT::Edge;
+  using BuilderEdge = std::pair<EdgeValueT, size_type>;
+  using EdgeList = std::vector<BuilderEdge>;
+  using BuilderVertex = std::pair<NodeValueT, EdgeList>;
+  using VertexVec = std::vector<BuilderVertex>;
+
+public:
+  using NodeRef = size_type;
+
+  NodeRef addVertex(const NodeValueT &V) {
+    auto I = __adj_list.emplace(__adj_list.end(), V, EdgeList{});
+    return std::distance(__adj_list.begin(), I);
+  }
+
+  void addEdge(const EdgeValueT &E, NodeRef From, NodeRef To) {
+    __adj_list[From].second.emplace_back(E, To);
+  }
+
+  bool empty() const { return __adj_list.empty(); }
+
+  template <typename... ArgT> GraphT *get(ArgT &&... Args) {
+    size_type VertexSize = __adj_list.size(), EdgeSize = 0;
+    for (const auto &V : __adj_list) {
+      EdgeSize += V.second.size();
+    }
+    auto *VertexArray = new Node[VertexSize + 1 /* terminator node */];
+    auto *EdgeArray = new Edge[EdgeSize];
+    size_type VI = 0, EI = 0;
+    for (; VI < static_cast<size_type>(__adj_list.size()); ++VI) {
+      VertexArray[VI].__value = std::move(__adj_list[VI].first);
+      VertexArray[VI].__edges = &EdgeArray[EI];
+      auto NumEdges = static_cast<size_type>(__adj_list[VI].second.size());
+      if (NumEdges > 0) {
+        for (size_type VEI = 0; VEI < NumEdges; ++VEI, ++EI) {
+          auto &E = __adj_list[VI].second[VEI];
+          EdgeArray[EI].__value = std::move(E.first);
+          EdgeArray[EI].__dest = VertexArray + E.second;
+        }
+      }
+    }
+    assert(VI == VertexSize && EI == EdgeSize && "Gadget graph malformed");
+    VertexArray[VI].__edges = EdgeArray + EdgeSize; // terminator node
+    return new GraphT{VertexArray, VertexSize, EdgeArray, EdgeSize,
+                      std::forward<ArgT>(Args)...};
+  }
+
+  template <typename... ArgT>
+  static GraphT *trim(const GraphT &G, const NodeSet &TrimNodes,
+                      const EdgeSet &TrimEdges, ArgT &&... Args) {
+    size_type NewVertexSize = TrimNodes.size() - TrimNodes.count();
+    size_type NewEdgeSize = TrimEdges.size() - TrimEdges.count();
+    auto *NewVertexArray = new Node[NewVertexSize + 1 /* terminator node */];
+    auto *NewEdgeArray = new Edge[NewEdgeSize];
+    size_type TrimmedNodesSoFar = 0,
+              *TrimmedNodes = new size_type[TrimNodes.size()];
+    for (size_type I = 0; I < TrimNodes.size(); ++I) {
+      TrimmedNodes[I] = TrimmedNodesSoFar;
+      if (TrimNodes.contains(G.nodes_begin() + I))
+        ++TrimmedNodesSoFar;
+    }
+    size_type VertexI = 0, EdgeI = 0;
+    for (Node *NI = G.nodes_begin(), *NE = G.nodes_end(); NI != NE; ++NI) {
+      if (TrimNodes.contains(NI))
+        continue;
+      size_type NewNumEdges =
+          static_cast<int>((NI + 1)->__edges - NI->__edges) > 0
+              ? std::count_if(
+                    NI->__edges, (NI + 1)->__edges,
+                    [&TrimEdges](Edge &E) { return !TrimEdges.contains(&E); })
+              : 0;
+      NewVertexArray[VertexI].__value = NI->__value;
+      NewVertexArray[VertexI].__edges = &NewEdgeArray[EdgeI];
+      if (NewNumEdges > 0) {
+        for (Edge *EI = NI->__edges, *EE = (NI + 1)->__edges; EI != EE; ++EI) {
+          if (TrimEdges.contains(EI))
+            continue;
+          NewEdgeArray[EdgeI].__value = EI->__value;
+          size_type DestIdx = std::distance(G.nodes_begin(), EI->__dest);
+          size_type NewIdx = DestIdx - TrimmedNodes[DestIdx];
+          assert(NewIdx < NewVertexSize);
+          NewEdgeArray[EdgeI].__dest = NewVertexArray + NewIdx;
+          ++EdgeI;
+        }
+      }
+      ++VertexI;
+    }
+    delete[] TrimmedNodes;
+    assert(VertexI == NewVertexSize && EdgeI == NewEdgeSize &&
+           "Gadget graph malformed");
+    NewVertexArray[VertexI].__edges = NewEdgeArray + NewEdgeSize;
+    return new GraphT{NewVertexArray, NewVertexSize, NewEdgeArray, NewEdgeSize,
+                      std::forward<ArgT>(Args)...};
+  }
+
+private:
+  VertexVec __adj_list;
+};
+
+template <typename NodeValueT, typename EdgeValueT>
+struct GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *> {
+  using GraphT = ImmutableGraph<NodeValueT, EdgeValueT>;
+  using NodeRef = typename GraphT::Node *;
+  using EdgeRef = typename GraphT::Edge &;
+
+  static NodeRef edge_dest(EdgeRef E) { return E.__dest; }
+  using ChildIteratorType =
+      mapped_iterator<typename GraphT::Edge *, decltype(&edge_dest)>;
+
+  static NodeRef getEntryNode(GraphT *G) { return G->nodes_begin(); }
+  static ChildIteratorType child_begin(NodeRef N) {
+    return {N->__edges, &edge_dest};
+  }
+  static ChildIteratorType child_end(NodeRef N) {
+    return {(N + 1)->__edges, &edge_dest};
+  }
+
+  static NodeRef getNode(typename GraphT::Node &N) { return NodeRef{&N}; }
+  using nodes_iterator =
+      mapped_iterator<typename GraphT::Node *, decltype(&getNode)>;
+  static nodes_iterator nodes_begin(GraphT *G) {
+    return {G->nodes_begin(), &getNode};
+  }
+  static nodes_iterator nodes_end(GraphT *G) {
+    return {G->nodes_end(), &getNode};
+  }
+
+  using ChildEdgeIteratorType = typename GraphT::Edge *;
+
+  static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+    return N->__edges;
+  }
+  static ChildEdgeIteratorType child_edge_end(NodeRef N) {
+    return (N + 1)->__edges;
+  }
+  static typename GraphT::size_type size(GraphT *G) { return G->nodes_size(); }
+};
+
+} // end namespace llvm
+
+#endif // IMMUTABLEGRAPH_H
Index: llvm/lib/Target/X86/X86.h
===================================================================
--- llvm/lib/Target/X86/X86.h
+++ llvm/lib/Target/X86/X86.h
@@ -137,6 +137,9 @@
                                                   X86Subtarget &,
                                                   X86RegisterBankInfo &);
 
+FunctionPass *createX86LoadValueInjectionIndirectThunksPass();
+FunctionPass *createX86LoadValueInjectionLoadHardeningPass();
+FunctionPass *createX86LoadValueInjectionRetHardeningPass();
 FunctionPass *createX86SpeculativeLoadHardeningPass();
 
 void initializeEvexToVexInstPassPass(PassRegistry &);
@@ -152,6 +155,9 @@
 void initializeX86ExecutionDomainFixPass(PassRegistry &);
 void initializeX86ExpandPseudoPass(PassRegistry &);
 void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionIndirectThunksPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &);
 void initializeX86OptimizeLEAPassPass(PassRegistry &);
 void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
 
Index: llvm/lib/Target/X86/X86.td
===================================================================
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -426,6 +426,22 @@
           "ourselves. Only has effect when combined with some other retpoline "
           "feature", [FeatureRetpolineIndirectCalls]>;
 
+// Mitigate LVI attacks against indirect calls/branches and call returns
+def FeatureLVIControlFlowIntegrity
+    : SubtargetFeature<
+          "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
+          "Prevent indirect calls/branches from using a memory operand, and "
+          "precede all indirect calls/branches from a register with an "
+          "LFENCE instruction to serialize control flow. Also decompose RET "
+          "instructions into a POP+LFENCE+JMP sequence.">;
+
+// Mitigate LVI attacks against data loads
+def FeatureLVILoadHardening
+    : SubtargetFeature<
+          "lvi-load-hardening", "UseLVILoadHardening", "true",
+          "Insert LFENCE instructions to prevent data speculatively injected "
+          "into loads from being used maliciously.">;
+
 // Direct Move instructions.
 def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
                                        "Support movdiri instruction">;
Index: llvm/lib/Target/X86/X86FastISel.cpp
===================================================================
--- llvm/lib/Target/X86/X86FastISel.cpp
+++ llvm/lib/Target/X86/X86FastISel.cpp
@@ -3208,7 +3208,8 @@
     return false;
 
   // Functions using retpoline for indirect calls need to use SDISel.
-  if (Subtarget->useRetpolineIndirectCalls())
+  if (Subtarget->useRetpolineIndirectCalls() ||
+      Subtarget->useLVIControlFlowIntegrity())
     return false;
 
   // Handle only C, fastcc, and webkit_js calling conventions for now.
Index: llvm/lib/Target/X86/X86FrameLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86FrameLowering.cpp
+++ llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -962,7 +962,8 @@
   bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
 
   // FIXME: Add retpoline support and remove this.
-  if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls())
+  if (Is64Bit && IsLargeCodeModel && (STI.useRetpolineIndirectCalls() ||
+                                      STI.useLVIControlFlowIntegrity()))
     report_fatal_error("Emitting stack probe calls on 64-bit with the large "
                        "code model and retpoline not yet implemented.");
 
@@ -2703,7 +2704,7 @@
     // is laid out within 2^31 bytes of each function body, but this seems
     // to be sufficient for JIT.
     // FIXME: Add retpoline support and remove the error here..
-    if (STI.useRetpolineIndirectCalls())
+    if (STI.useRetpolineIndirectCalls() || STI.useLVIControlFlowIntegrity())
       report_fatal_error("Emitting morestack calls on 64-bit with the large "
                          "code model and retpoline not yet implemented.");
     BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1023,7 +1023,8 @@
     if (OptLevel != CodeGenOpt::None &&
         // Only do this when the target can fold the load into the call or
         // jmp.
-        !Subtarget->useRetpolineIndirectCalls() &&
+        !(Subtarget->useRetpolineIndirectCalls() ||
+          Subtarget->useLVIControlFlowIntegrity()) &&
         ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
          (N->getOpcode() == X86ISD::TC_RETURN &&
           (Subtarget->is64Bit() ||
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30555,7 +30555,8 @@
 
 bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
   // If the subtarget is using retpolines, we need to not generate jump tables.
-  if (Subtarget.useRetpolineIndirectBranches())
+  if (Subtarget.useRetpolineIndirectBranches() ||
+      Subtarget.useLVIControlFlowIntegrity())
     return false;
 
   // Otherwise, fallback on the generic logic.
@@ -31758,22 +31759,26 @@
   return BB;
 }
 
-static unsigned getOpcodeForRetpoline(unsigned RPOpc) {
+static unsigned getOpcodeForThunk(unsigned RPOpc) {
   switch (RPOpc) {
   case X86::RETPOLINE_CALL32:
     return X86::CALLpcrel32;
   case X86::RETPOLINE_CALL64:
     return X86::CALL64pcrel32;
+  case X86::LVI_THUNK_CALL64:
+    return X86::CALL64pcrel32;
   case X86::RETPOLINE_TCRETURN32:
     return X86::TCRETURNdi;
   case X86::RETPOLINE_TCRETURN64:
     return X86::TCRETURNdi64;
+  case X86::LVI_THUNK_TCRETURN64:
+    return X86::TCRETURNdi64;
   }
   llvm_unreachable("not retpoline opcode");
 }
 
-static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
-                                      unsigned Reg) {
+static const char *getThunkSymbol(const X86Subtarget &Subtarget,
+                                  unsigned Reg) {
   if (Subtarget.useRetpolineExternalThunk()) {
     // When using an external thunk for retpolines, we pick names that match the
     // names GCC happens to use as well. This helps simplify the implementation
@@ -31808,6 +31813,12 @@
     llvm_unreachable("unexpected reg for retpoline");
   }
 
+  if (Subtarget.useLVIControlFlowIntegrity()) {
+    assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
+    assert(Reg == X86::R11 && "Invalid register for LVI CFI");
+    return "__x86_indirect_thunk_r11";
+  }
+
   // When targeting an internal COMDAT thunk use an LLVM-specific name.
   switch (Reg) {
   case X86::EAX:
@@ -31837,7 +31848,7 @@
   DebugLoc DL = MI.getDebugLoc();
   const X86InstrInfo *TII = Subtarget.getInstrInfo();
   Register CalleeVReg = MI.getOperand(0).getReg();
-  unsigned Opc = getOpcodeForRetpoline(MI.getOpcode());
+  unsigned Opc = getOpcodeForThunk(MI.getOpcode());
 
   // Find an available scratch register to hold the callee. On 64-bit, we can
   // just use R11, but we scan for uses anyway to ensure we don't generate
@@ -31871,7 +31882,7 @@
     report_fatal_error("calling convention incompatible with retpoline, no "
                        "available registers");
 
-  const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
+  const char *Symbol = getThunkSymbol(Subtarget, AvailableReg);
 
   BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
       .addReg(CalleeVReg);
@@ -32649,8 +32660,10 @@
     return EmitLoweredTLSAddr(MI, BB);
   case X86::RETPOLINE_CALL32:
   case X86::RETPOLINE_CALL64:
+  case X86::LVI_THUNK_CALL64:
   case X86::RETPOLINE_TCRETURN32:
   case X86::RETPOLINE_TCRETURN64:
+  case X86::LVI_THUNK_TCRETURN64:
     return EmitLoweredRetpoline(MI, BB);
   case X86::CATCHRET:
     return EmitLoweredCatchRet(MI, BB);
Index: llvm/lib/Target/X86/X86InstrCompiler.td
===================================================================
--- llvm/lib/Target/X86/X86InstrCompiler.td
+++ llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1199,13 +1199,19 @@
 
 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
           (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
-          Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
+          Requires<[In64BitMode, NotUseRetpolineIndirectCalls,
+                    NotUseLVIIndirectThunks]>;
 
 // Don't fold loads into X86tcret requiring more than 6 regs.
 // There wouldn't be enough scratch registers for base+index.
 def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
           (TCRETURNmi64 addr:$dst, imm:$off)>,
-          Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
+          Requires<[In64BitMode, NotUseRetpolineIndirectCalls,
+                    NotUseLVIIndirectThunks]>;
+
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+          (LVI_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
+          Requires<[In64BitMode, UseLVIIndirectThunks]>;
 
 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
           (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
Index: llvm/lib/Target/X86/X86InstrControl.td
===================================================================
--- llvm/lib/Target/X86/X86InstrControl.td
+++ llvm/lib/Target/X86/X86InstrControl.td
@@ -334,11 +334,13 @@
                       Requires<[In64BitMode]>;
   def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
                         "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
-                      Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>;
+                      Requires<[In64BitMode,NotUseRetpolineIndirectCalls,
+                                NotUseLVIIndirectThunks]>;
   def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
                         "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
                       Requires<[In64BitMode,FavorMemIndirectCall,
-                                NotUseRetpolineIndirectCalls]>;
+                                NotUseRetpolineIndirectCalls,
+                                NotUseLVIIndirectThunks]>;
 
   // Non-tracking calls for IBT, use with caution.
   let isCodeGenOnly = 1 in {
@@ -400,6 +402,9 @@
   def RETPOLINE_CALL64 :
     PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
             Requires<[In64BitMode,UseRetpolineIndirectCalls]>;
+  def LVI_THUNK_CALL64 :
+    PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
+            Requires<[In64BitMode,UseLVIIndirectThunks]>;
 
   // Retpoline variant of indirect tail calls.
   let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
@@ -408,6 +413,12 @@
     def RETPOLINE_TCRETURN32 :
       PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>;
   }
+
+  // LVI thunk variant of indirect tail calls.
+  let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+    def LVI_THUNK_TCRETURN64 :
+      PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>;
+  }
 }
 
 // Conditional tail calls are similar to the above, but they are branches
Index: llvm/lib/Target/X86/X86InstrInfo.td
===================================================================
--- llvm/lib/Target/X86/X86InstrInfo.td
+++ llvm/lib/Target/X86/X86InstrInfo.td
@@ -1005,6 +1005,8 @@
 def HasMFence    : Predicate<"Subtarget->hasMFence()">;
 def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">;
 def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">;
+def UseLVIIndirectThunks : Predicate<"Subtarget->useLVIControlFlowIntegrity()">;
+def NotUseLVIIndirectThunks  : Predicate<"!Subtarget->useLVIControlFlowIntegrity()">;
 
 //===----------------------------------------------------------------------===//
 // X86 Instruction Format Definitions.
Index: llvm/lib/Target/X86/X86LoadValueInjectionIndirectThunks.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Target/X86/X86LoadValueInjectionIndirectThunks.cpp
@@ -0,0 +1,196 @@
+//=- X86LoadValueInjectionIndirectThunks.cpp - Construct LVI thunks for x86 -=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: This pass replaces each indirect call/jump with a direct call
+/// to a thunk that looks like:
+/// ```
+/// lfence
+/// jmpq *%r11
+/// ```
+/// This ensures that if the value in register %r11 was loaded from memory, then
+/// the value in %r11 is (architecturally) correct prior to the jump.
+///
+/// Note: A lot of this code was lifted from X86RetpolineThunks.cpp.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define PASS_KEY "x86-lvi-thunks"
+#define DEBUG_TYPE PASS_KEY
+
+static const char R11ThunkName[] = "__x86_indirect_thunk_r11";
+
+namespace {
+class X86LoadValueInjectionIndirectThunksPass : public MachineFunctionPass {
+public:
+  X86LoadValueInjectionIndirectThunksPass() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override {
+    return "X86 Load Value Injection (LVI) Indirect Thunks Pass";
+  }
+  bool doInitialization(Module &M) override;
+  bool runOnMachineFunction(MachineFunction &F) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    MachineFunctionPass::getAnalysisUsage(AU);
+    AU.addRequired<MachineModuleInfoWrapperPass>();
+    AU.addPreserved<MachineModuleInfoWrapperPass>();
+  }
+
+  static char ID;
+
+private:
+  MachineModuleInfo *MMI;
+  const TargetMachine *TM;
+  const X86Subtarget *STI;
+  const X86InstrInfo *TII;
+
+  bool InsertedThunks;
+
+  void createThunkFunction(Module &M, StringRef Name);
+  void populateThunk(MachineFunction &MF, unsigned Reg);
+};
+
+} // end anonymous namespace
+
+char X86LoadValueInjectionIndirectThunksPass::ID = 0;
+
+bool X86LoadValueInjectionIndirectThunksPass::doInitialization(Module &M) {
+  InsertedThunks = false;
+  return false;
+}
+
+bool X86LoadValueInjectionIndirectThunksPass::runOnMachineFunction(
+    MachineFunction &MF) {
+  STI = &MF.getSubtarget<X86Subtarget>();
+  if (!(STI->hasSSE2() || STI->is64Bit())) {
+    // FIXME: support 32-bit
+    return false;
+  }
+
+  // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+  const Function &F = MF.getFunction();
+  if (!F.hasOptNone() && skipFunction(F)) {
+    return false;
+  }
+
+  LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+                    << " *****\n");
+  TM = &MF.getTarget();
+  TII = STI->getInstrInfo();
+  MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+  Module &M = const_cast<Module &>(*MMI->getModule());
+
+  // If this function is not a thunk, check to see if we need to insert
+  // a thunk.
+  if (MF.getName() != R11ThunkName) {
+    // If we've already inserted a thunk, nothing else to do.
+    if (InsertedThunks) {
+      return false;
+    }
+
+    // Only add a thunk if one of the functions has the LVI-CFI feature
+    // enabled in its subtarget
+    if (!STI->useLVIControlFlowIntegrity()) {
+      return false;
+    }
+
+    // Otherwise, we need to insert the thunk.
+    // WARNING: This is not really a well behaving thing to do in a function
+    // pass. We extract the module and insert a new function (and machine
+    // function) directly into the module.
+    LLVM_DEBUG(dbgs() << "Creating thunk procedure" << '\n');
+    createThunkFunction(M, R11ThunkName);
+    InsertedThunks = true;
+    return true;
+  }
+
+  assert(MF.getName() == "__x86_indirect_thunk_r11" &&
+         "Should only have an r11 thunk on 64-bit targets");
+  LLVM_DEBUG(dbgs() << "Populating thunk" << '\n');
+  populateThunk(MF, X86::R11);
+  return true;
+}
+
+void X86LoadValueInjectionIndirectThunksPass::createThunkFunction(
+    Module &M, StringRef Name) {
+  assert(Name == R11ThunkName && "Created a thunk with an unexpected prefix!");
+
+  LLVMContext &Ctx = M.getContext();
+  auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
+  Function *F =
+      Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
+  F->setVisibility(GlobalValue::HiddenVisibility);
+  F->setComdat(M.getOrInsertComdat(Name));
+
+  // Add Attributes so that we don't create a frame, unwind information, or
+  // inline.
+  AttrBuilder B;
+  B.addAttribute(llvm::Attribute::NoUnwind);
+  B.addAttribute(llvm::Attribute::Naked);
+  F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+
+  // Populate our function a bit so that we can verify.
+  BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
+  IRBuilder<> Builder(Entry);
+
+  Builder.CreateRetVoid();
+
+  // MachineFunctions/MachineBasicBlocks aren't created automatically for the
+  // IR-level constructs we already made. Create them and insert them into the
+  // module.
+  MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
+  MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry);
+
+  // Insert EntryMBB into MF. It's not in the module until we do this.
+  MF.insert(MF.end(), EntryMBB);
+}
+
+void X86LoadValueInjectionIndirectThunksPass::populateThunk(MachineFunction &MF,
+                                                            unsigned Reg) {
+  // Set MF properties. We never use vregs...
+  MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+
+  // Grab the entry MBB and erase any other blocks. O0 codegen appears to
+  // generate two bbs for the entry block.
+  MachineBasicBlock *Entry = &MF.front();
+  Entry->clear();
+  while (MF.size() > 1)
+    MF.erase(std::next(MF.begin()));
+
+  BuildMI(Entry, DebugLoc(), TII->get(X86::LFENCE));
+  BuildMI(Entry, DebugLoc(), TII->get(X86::JMP64r)).addReg(Reg);
+  Entry->addLiveIn(Reg);
+  return;
+}
+
+INITIALIZE_PASS_BEGIN(X86LoadValueInjectionIndirectThunksPass, PASS_KEY,
+                      "X86 LVI indirect thunk inserter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_END(X86LoadValueInjectionIndirectThunksPass, PASS_KEY,
+                    "X86 LVI indirect thunk inserter", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionIndirectThunksPass() {
+  return new X86LoadValueInjectionIndirectThunksPass();
+}
Index: llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@@ -0,0 +1,874 @@
+//==-- X86LoadValueInjectionLoadHardening.cpp - LVI load hardening for x86 --=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: This pass finds Load Value Injection (LVI) gadgets consisting
+/// of a load from memory (i.e., SOURCE), and any operation that may transmit
+/// the value loaded from memory over a covert channel, or use the value loaded
+/// from memory to determine a branch/call target (i.e., SINK). After finding
+/// all such gadgets in a given function, the pass minimally inserts LFENCE
+/// instructions in such a manner that the following property is satisfied: for
+/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at
+/// least one LFENCE instruction. The algorithm that implements this minimal
+/// insertion is influenced by an academic paper that minimally inserts memory
+/// fences for high-performance concurrent programs:
+///         http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf
+/// The algorithm implemented in this pass is as follows:
+/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the
+/// following components:
+///    - SOURCE instructions (also includes function arguments)
+///    - SINK instructions
+///    - Basic block entry points
+///    - Basic block terminators
+///    - LFENCE instructions
+/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e.,
+/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been
+/// mitigated, go to step 6.
+/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion.
+/// 4. Insert one LFENCE along each CFG edge that was cut in step 3.
+/// 5. Go to step 2.
+/// 6. If any LFENCEs were inserted, return `true` from runOnFunction() to tell
+/// LLVM that the function was modified.
+///
+/// For performance purposes, this pass uses a custom data structure to
+/// implement the GadgetGraph: ImmutableGraph. As the name suggests, an
+/// ImmutableGraph cannot be modified, except by creating a new ImmutableGraph.
+/// ImmutableGraph is implemented as two arrays: one containing nodes, and one
+/// containing edges. The advantages to this implementation are two-fold:
+/// 1. Iteration and traversal operations should experience terrific caching
+///    performance.
+/// 2. Set representations and operations on nodes and edges become
+///    extraordinarily efficient. For instance, a set of edges is implemented as
+///    an llvm::BitVector, wherein each bit corresponds to one edge in the edge
+///    array. This implies a lower bound of 64x spacial improvement over, e.g.,
+///    an llvm::DenseSet or llvm::SmallSet. It also means that
+///    insert/erase/contains operations complete in negligible constant time:
+///    insert and erase require one load and one store, and contains requires
+///    just one load.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ImmutableGraph.h"
+#include "X86.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define PASS_KEY "x86-lvi-load"
+#define DEBUG_TYPE PASS_KEY
+
+STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
+STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
+STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
+                                 "were deployed");
+STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis");
+
+static cl::opt<std::string> OptimizePluginPath(
+    PASS_KEY "-opt-plugin",
+    cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden);
+
+static cl::opt<bool> NoConditionalBranches(
+    PASS_KEY "-no-cbranch",
+    cl::desc("Don't treat conditional branches as disclosure gadgets. This "
+             "may improve performance, at the cost of security."),
+    cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDot(
+    PASS_KEY "-dot",
+    cl::desc(
+        "For each function, emit a dot graph depicting potential LVI gadgets"),
+    cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDotOnly(
+    PASS_KEY "-dot-only",
+    cl::desc("For each function, emit a dot graph depicting potential LVI "
+             "gadgets, and do not insert any fences"),
+    cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDotVerify(
+    PASS_KEY "-dot-verify",
+    cl::desc("For each function, emit a dot graph to stdout depicting "
+             "potential LVI gadgets, used for testing purposes only"),
+    cl::init(false), cl::Hidden);
+
+static cl::opt<bool> NoFixedLoads(
+    PASS_KEY "-no-fixed",
+    cl::desc("Don't mitigate RIP-relative or RSP-relative loads. This "
+             "may improve performance, at the cost of security."),
+    cl::init(false), cl::Hidden);
+
+static llvm::sys::DynamicLibrary OptimizeDL{};
+typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size,
+                            unsigned int *edges, int *edge_values,
+                            int *cut_edges /* out */, unsigned int edges_size);
+static OptimizeCutT OptimizeCut = nullptr;
+
+#define ARG_NODE nullptr
+#define GADGET_EDGE ((int)(-1))
+#define WEIGHT(EdgeValue) ((double)(2 * (EdgeValue) + 1))
+
+namespace {
+
+class X86LoadValueInjectionLoadHardeningPass : public MachineFunctionPass {
+public:
+  X86LoadValueInjectionLoadHardeningPass() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override {
+    return "X86 Load Value Injection (LVI) Load Hardening Pass";
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  static char ID;
+
+private:
+  struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> {
+    using GraphT = ImmutableGraph<MachineInstr *, int>;
+    using Node = typename GraphT::Node;
+    using Edge = typename GraphT::Edge;
+    using size_type = typename GraphT::size_type;
+    MachineGadgetGraph(Node *Nodes, size_type NodesSize, Edge *Edges,
+                       size_type EdgesSize, int NumFences = 0,
+                       int NumGadgets = 0)
+        : GraphT{Nodes, NodesSize, Edges, EdgesSize}, NumFences{NumFences},
+          NumGadgets{NumGadgets} {}
+    MachineFunction &getMF() { // FIXME: This function should be cleaner
+      for (Node *NI = nodes_begin(), *const NE = nodes_end(); NI != NE; ++NI) {
+        if (NI->value()) {
+          return *NI->value()->getMF();
+        }
+      }
+      llvm_unreachable("Could not find a valid node");
+    }
+    static inline bool isCFGEdge(Edge &E) { return E.value() != GADGET_EDGE; }
+    static inline bool isGadgetEdge(Edge &E) {
+      return E.value() == GADGET_EDGE;
+    }
+    int NumFences;
+    int NumGadgets;
+  };
+  friend struct llvm::DOTGraphTraits<MachineGadgetGraph *>;
+  using GTraits = llvm::GraphTraits<MachineGadgetGraph *>;
+  using GraphBuilder = ImmutableGraphBuilder<MachineGadgetGraph>;
+  using EdgeSet = MachineGadgetGraph::EdgeSet;
+  using Gadget = std::pair<MachineInstr *, MachineInstr *>;
+
+  const X86Subtarget *STI;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+
+  int hardenLoads(MachineFunction &MF, bool Fixed) const;
+  std::unique_ptr<MachineGadgetGraph>
+  getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI,
+                 const MachineDominatorTree &MDT,
+                 const MachineDominanceFrontier &MDF, bool FixedLoads) const;
+  std::unique_ptr<MachineGadgetGraph>
+  elimEdges(std::unique_ptr<MachineGadgetGraph> Graph) const;
+  void cutEdges(MachineGadgetGraph &G, EdgeSet &CutEdges /* out */) const;
+  int insertFences(MachineGadgetGraph &G,
+                   EdgeSet &CutEdges /* in, out */) const;
+
+  bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const;
+  bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const;
+  template <unsigned K> bool hasLoadFrom(const MachineInstr &MI) const;
+  bool instrAccessesStackSlot(const MachineInstr &MI) const;
+  bool instrAccessesConstantPool(const MachineInstr &MI) const;
+  bool instrAccessesGOT(const MachineInstr &MI) const;
+  inline bool instrIsFixedAccess(const MachineInstr &MI) const {
+    return instrAccessesConstantPool(MI) || instrAccessesStackSlot(MI) ||
+           instrAccessesGOT(MI);
+  }
+  inline bool isFence(const MachineInstr *MI) const {
+    return MI && (MI->getOpcode() == X86::LFENCE ||
+                  (STI->useLVIControlFlowIntegrity() && MI->isCall()));
+  }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+template <>
+struct GraphTraits<X86LoadValueInjectionLoadHardeningPass::MachineGadgetGraph *>
+    : GraphTraits<ImmutableGraph<MachineInstr *, int> *> {};
+
+template <>
+struct DOTGraphTraits<
+    X86LoadValueInjectionLoadHardeningPass::MachineGadgetGraph *>
+    : DefaultDOTGraphTraits {
+  using GraphType = X86LoadValueInjectionLoadHardeningPass::MachineGadgetGraph;
+  using Traits = X86LoadValueInjectionLoadHardeningPass::GTraits;
+  using NodeRef = typename Traits::NodeRef;
+  using EdgeRef = typename Traits::EdgeRef;
+  using ChildIteratorType = typename Traits::ChildIteratorType;
+  using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType;
+
+  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+  static std::string getGraphName(GraphType *G) {
+    std::string GraphName{"Speculative gadgets for \""};
+    GraphName += G->getMF().getName();
+    GraphName += "\" function";
+    return GraphName;
+  }
+
+  std::string getNodeLabel(NodeRef Node, GraphType *) {
+    std::string str;
+    raw_string_ostream str_stream{str};
+    if (Node->value() == ARG_NODE)
+      return "ARGS";
+    str_stream << *Node->value();
+    return str_stream.str();
+  }
+
+  static std::string getNodeAttributes(NodeRef Node, GraphType *) {
+    MachineInstr *MI = Node->value();
+    if (MI == ARG_NODE)
+      return "color = blue";
+    else if (MI->getOpcode() == X86::LFENCE)
+      return "color = green";
+    else
+      return "";
+  }
+
+  static std::string getEdgeAttributes(NodeRef, ChildIteratorType E,
+                                       GraphType *) {
+    int EdgeVal = (*E.getCurrent()).value();
+    return EdgeVal >= 0 ? "label = " + std::to_string(EdgeVal)
+                        : "color = red, style = \"dashed\"";
+  }
+};
+
+} // end namespace llvm
+
+char X86LoadValueInjectionLoadHardeningPass::ID = 0;
+
+void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage(
+    AnalysisUsage &AU) const {
+  MachineFunctionPass::getAnalysisUsage(AU);
+  AU.addRequired<MachineLoopInfo>();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequired<MachineDominanceFrontier>();
+  AU.setPreservesCFG();
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction(
+    MachineFunction &MF) {
+  STI = &MF.getSubtarget<X86Subtarget>();
+  if (!STI->useLVILoadHardening() || !(STI->hasSSE2() || STI->is64Bit()))
+    return false; // FIXME: support 32-bit
+
+  // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+  const Function &F = MF.getFunction();
+  if (!F.hasOptNone() && skipFunction(F))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+                    << " *****\n");
+  ++NumFunctionsConsidered;
+  TII = STI->getInstrInfo();
+  TRI = STI->getRegisterInfo();
+  LLVM_DEBUG(dbgs() << "Hardening data-dependent loads...\n");
+  int FencesInserted = hardenLoads(MF, false);
+  LLVM_DEBUG(dbgs() << "Hardening data-dependent loads... Done\n");
+  if (!NoFixedLoads) {
+    LLVM_DEBUG(dbgs() << "Hardening fixed loads...\n");
+    hardenLoads(MF, true);
+    LLVM_DEBUG(dbgs() << "Hardening fixed loads... Done\n");
+  }
+  if (FencesInserted > 0)
+    ++NumFunctionsMitigated;
+  NumFences += FencesInserted;
+  return (FencesInserted > 0);
+}
+
+// Apply the mitigation to `MF`, return the number of fences inserted.
+// If `FixedLoads` is `true`, then the mitigation will be applied to both fixed
+// and non-fixed loads; otherwise, only non-fixed loads.
+int X86LoadValueInjectionLoadHardeningPass::hardenLoads(MachineFunction &MF,
+                                                        bool FixedLoads) const {
+  int FencesInserted = 0;
+
+  LLVM_DEBUG(dbgs() << "Building gadget graph...\n");
+  const auto &MLI = getAnalysis<MachineLoopInfo>();
+  const auto &MDT = getAnalysis<MachineDominatorTree>();
+  const auto &MDF = getAnalysis<MachineDominanceFrontier>();
+  std::unique_ptr<MachineGadgetGraph> Graph =
+      getGadgetGraph(MF, MLI, MDT, MDF, FixedLoads);
+  LLVM_DEBUG(dbgs() << "Building gadget graph... Done\n");
+  if (Graph == nullptr)
+    return 0; // didn't find any gadgets
+
+  if (EmitDotVerify) {
+    WriteGraph(outs(), Graph.get());
+    return 0;
+  }
+
+  if (EmitDot || EmitDotOnly) {
+    LLVM_DEBUG(dbgs() << "Emitting gadget graph...\n");
+    std::error_code FileError;
+    std::string FileName = "lvi.";
+    if (FixedLoads)
+      FileName += "fixed.";
+    FileName += Graph->getMF().getName();
+    FileName += ".dot";
+    raw_fd_ostream FileOut(FileName, FileError);
+    if (FileError)
+      errs() << FileError.message();
+    WriteGraph(FileOut, Graph.get());
+    FileOut.close();
+    LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n");
+    if (EmitDotOnly)
+      return 0;
+  }
+
+  do {
+    LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
+    std::unique_ptr<MachineGadgetGraph> ElimGraph = elimEdges(std::move(Graph));
+    LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
+    if (ElimGraph->NumGadgets == 0)
+      break;
+
+    EdgeSet CutEdges{*ElimGraph};
+    LLVM_DEBUG(dbgs() << "Cutting edges...\n");
+    cutEdges(*ElimGraph, CutEdges);
+    LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
+
+    LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
+    FencesInserted += insertFences(*ElimGraph, CutEdges);
+    LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
+
+    Graph.reset(GraphBuilder::trim(
+        *ElimGraph, MachineGadgetGraph::NodeSet{*ElimGraph}, CutEdges));
+  } while (true);
+
+  return FencesInserted;
+}
+
+std::unique_ptr<X86LoadValueInjectionLoadHardeningPass::MachineGadgetGraph>
+X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(
+    MachineFunction &MF, const MachineLoopInfo &MLI,
+    const MachineDominatorTree &MDT, const MachineDominanceFrontier &MDF,
+    bool FixedLoads) const {
+  using namespace rdf;
+
+  // Build the Register Dataflow Graph using the RDF framework
+  TargetOperandInfo TOI{*TII};
+  DataFlowGraph DFG{MF, *TII, *TRI, MDT, MDF, TOI};
+  DFG.build();
+  Liveness L{MF.getRegInfo(), DFG};
+  L.computePhiInfo();
+
+  GraphBuilder Builder;
+  using GraphIter = typename GraphBuilder::NodeRef;
+  DenseMap<MachineInstr *, GraphIter> NodeMap;
+  int FenceCount = 0;
+  auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) {
+    auto Ref = NodeMap.find(MI);
+    if (Ref == NodeMap.end()) {
+      auto I = Builder.addVertex(MI);
+      NodeMap[MI] = I;
+      return std::pair<GraphIter, bool>{I, true};
+    } else {
+      return std::pair<GraphIter, bool>{Ref->getSecond(), false};
+    }
+  };
+
+  // Analyze all machine instructions to find gadgets and LFENCEs, adding
+  // each interesting value to `Nodes`
+  DenseSet<std::pair<GraphIter, GraphIter>> GadgetEdgeSet;
+  auto AnalyzeDef = [&](NodeAddr<DefNode *> Def) {
+    MachineInstr *MI = Def.Addr->getFlags() & NodeAttrs::PhiRef
+                           ? ARG_NODE
+                           : Def.Addr->getOp().getParent();
+    auto AnalyzeUse = [&](NodeAddr<UseNode *> Use) {
+      assert(!(Use.Addr->getFlags() & NodeAttrs::PhiRef));
+      MachineOperand &UseMO = Use.Addr->getOp();
+      MachineInstr &UseMI = *UseMO.getParent();
+      assert(UseMO.isReg());
+      // We naively assume that an instruction propagates any loaded Uses
+      // to all Defs, unless the instruction is a call
+      if (UseMI.isCall())
+        return false;
+      if (instrUsesRegToAccessMemory(UseMI, UseMO.getReg()) ||
+          (!NoConditionalBranches &&
+           instrUsesRegToBranch(UseMI, UseMO.getReg()))) { // found a gadget!
+        // add the root of this chain
+        auto GadgetBegin = MaybeAddNode(MI);
+        // and the instruction that (transitively) discloses the root
+        auto GadgetEnd = MaybeAddNode(&UseMI);
+        if (GadgetEdgeSet.insert({GadgetBegin.first, GadgetEnd.first}).second)
+          Builder.addEdge(GADGET_EDGE, GadgetBegin.first, GadgetEnd.first);
+        if (UseMI.mayLoad()) // FIXME: This should be more precise
+          return false;      // stop traversing further uses of `Reg`
+      }
+      return true;
+    };
+    SmallSet<NodeId, 8> NodesVisited;
+    std::function<void(NodeAddr<DefNode *>)> AnalyzeDefUseChain =
+        [&](NodeAddr<DefNode *> Def) {
+          if (Def.Addr->getAttrs() & NodeAttrs::Dead)
+            return;
+          RegisterRef DefReg = DFG.getPRI().normalize(Def.Addr->getRegRef(DFG));
+          NodeList Uses;
+          for (auto UseID : L.getAllReachedUses(DefReg, Def)) {
+            auto Use = DFG.addr<UseNode *>(UseID);
+            if (Use.Addr->getFlags() & NodeAttrs::PhiRef) { // phi node
+              NodeAddr<PhiNode *> Phi = Use.Addr->getOwner(DFG);
+              for (auto I : L.getRealUses(Phi.Id)) {
+                if (DFG.getPRI().alias(RegisterRef(I.first), DefReg)) {
+                  for (auto UA : I.second) {
+                    auto PhiUse = DFG.addr<UseNode *>(UA.first);
+                    Uses.push_back(PhiUse);
+                  }
+                }
+              }
+            } else { // not a phi node
+              Uses.push_back(Use);
+            }
+          }
+          for (auto N : Uses) {
+            NodeAddr<UseNode *> Use{N};
+            if (NodesVisited.insert(Use.Id).second && AnalyzeUse(Use)) {
+              NodeAddr<InstrNode *> Owner{Use.Addr->getOwner(DFG)};
+              NodeList Defs = Owner.Addr->members_if(DataFlowGraph::IsDef, DFG);
+              std::for_each(Defs.begin(), Defs.end(), AnalyzeDefUseChain);
+            }
+          }
+        };
+    AnalyzeDefUseChain(Def);
+  };
+
+  LLVM_DEBUG(dbgs() << "Analyzing def-use chains to find gadgets\n");
+  // Analyze function arguments
+  if (!FixedLoads) { // only need to analyze function args once
+    NodeAddr<BlockNode *> EntryBlock = DFG.getFunc().Addr->getEntryBlock(DFG);
+    for (NodeAddr<PhiNode *> ArgPhi :
+         EntryBlock.Addr->members_if(DataFlowGraph::IsPhi, DFG)) {
+      NodeList Defs = ArgPhi.Addr->members_if(DataFlowGraph::IsDef, DFG);
+      std::for_each(Defs.begin(), Defs.end(), AnalyzeDef);
+    }
+  }
+  // Analyze every instruction in MF
+  for (NodeAddr<BlockNode *> BA : DFG.getFunc().Addr->members(DFG)) {
+    for (NodeAddr<StmtNode *> SA :
+         BA.Addr->members_if(DataFlowGraph::IsCode<NodeAttrs::Stmt>, DFG)) {
+      MachineInstr *MI = SA.Addr->getCode();
+      if (isFence(MI)) {
+        MaybeAddNode(MI);
+        ++FenceCount;
+      } else if (MI->mayLoad() && ((FixedLoads && instrIsFixedAccess(*MI)) ||
+                                   (!FixedLoads && !instrIsFixedAccess(*MI)))) {
+        NodeList Defs = SA.Addr->members_if(DataFlowGraph::IsDef, DFG);
+        std::for_each(Defs.begin(), Defs.end(), AnalyzeDef);
+      }
+    }
+  }
+  int GadgetCount = static_cast<int>(GadgetEdgeSet.size());
+  LLVM_DEBUG(dbgs() << "Found " << FenceCount << " fences\n");
+  LLVM_DEBUG(dbgs() << "Found " << GadgetCount << " gadgets\n");
+  if (GadgetCount == 0)
+    return nullptr;
+  NumGadgets += GadgetCount;
+
+  // Traverse CFG to build the rest of the graph
+  SmallSet<MachineBasicBlock *, 8> BlocksVisited;
+  std::function<void(MachineBasicBlock *, GraphIter, unsigned)> TraverseCFG =
+      [&](MachineBasicBlock *MBB, GraphIter GI, unsigned ParentDepth) {
+        unsigned LoopDepth = MLI.getLoopDepth(MBB);
+        if (!MBB->empty()) {
+          // Always add the first instruction in each block
+          auto NI = MBB->begin();
+          auto BeginBB = MaybeAddNode(&*NI);
+          Builder.addEdge(ParentDepth, GI, BeginBB.first);
+          if (!BlocksVisited.insert(MBB).second)
+            return;
+
+          // Add any instructions within the block that are gadget components
+          GI = BeginBB.first;
+          while (++NI != MBB->end()) {
+            auto Ref = NodeMap.find(&*NI);
+            if (Ref != NodeMap.end()) {
+              Builder.addEdge(LoopDepth, GI, Ref->getSecond());
+              GI = Ref->getSecond();
+            }
+          }
+
+          // Always add the terminator instruction, if one exists
+          auto T = MBB->getFirstTerminator();
+          if (T != MBB->end()) {
+            auto EndBB = MaybeAddNode(&*T);
+            if (EndBB.second)
+              Builder.addEdge(LoopDepth, GI, EndBB.first);
+            GI = EndBB.first;
+          }
+        }
+        for (MachineBasicBlock *Succ : MBB->successors())
+          TraverseCFG(Succ, GI, LoopDepth);
+      };
+  // ARG_NODE is a pseudo-instruction that represents MF args in the GadgetGraph
+  GraphIter ArgNode = MaybeAddNode(ARG_NODE).first;
+  TraverseCFG(&MF.front(), ArgNode, 0);
+  std::unique_ptr<MachineGadgetGraph> G{Builder.get(FenceCount, GadgetCount)};
+  LLVM_DEBUG(dbgs() << "Found " << GTraits::size(G.get()) << " nodes\n");
+  return G;
+}
+
+std::unique_ptr<X86LoadValueInjectionLoadHardeningPass::MachineGadgetGraph>
+X86LoadValueInjectionLoadHardeningPass::elimEdges(
+    std::unique_ptr<MachineGadgetGraph> Graph) const {
+  MachineGadgetGraph::NodeSet ElimNodes{*Graph};
+  MachineGadgetGraph::EdgeSet ElimEdges{*Graph};
+
+  if (Graph->NumFences > 0) { // eliminate fences
+    for (auto EI = Graph->edges_begin(), EE = Graph->edges_end(); EI != EE;
+         ++EI) {
+      GTraits::NodeRef Dest = GTraits::edge_dest(*EI);
+      if (isFence(Dest->value())) {
+        ElimNodes.insert(Dest);
+        ElimEdges.insert(EI);
+        std::for_each(
+            GTraits::child_edge_begin(Dest), GTraits::child_edge_end(Dest),
+            [&ElimEdges](GTraits::EdgeRef E) { ElimEdges.insert(&E); });
+      }
+    }
+    LLVM_DEBUG(dbgs() << "Eliminated " << ElimNodes.count()
+                      << " fence nodes\n");
+  }
+
+  // eliminate gadget edges that are mitigated
+  int NumGadgets = 0;
+  MachineGadgetGraph::NodeSet Visited{*Graph}, GadgetSinks{*Graph};
+  MachineGadgetGraph::EdgeSet ElimGadgets{*Graph};
+  for (auto NI = GTraits::nodes_begin(Graph.get()),
+            NE = GTraits::nodes_end(Graph.get());
+       NI != NE; ++NI) {
+    // collect the gadgets for this node
+    for (auto EI = GTraits::child_edge_begin(*NI),
+              EE = GTraits::child_edge_end(*NI);
+         EI != EE; ++EI) {
+      if (MachineGadgetGraph::isGadgetEdge(*EI)) {
+        ++NumGadgets;
+        ElimGadgets.insert(EI);
+        GadgetSinks.insert(GTraits::edge_dest(*EI));
+      }
+    }
+    if (GadgetSinks.empty())
+      continue;
+    std::function<void(GTraits::NodeRef, bool)> TraverseDFS =
+        [&](GTraits::NodeRef N, bool FirstNode) {
+          if (!FirstNode) {
+            Visited.insert(N);
+            if (GadgetSinks.contains(N)) {
+              for (auto CEI = GTraits::child_edge_begin(*NI),
+                        CEE = GTraits::child_edge_end(*NI);
+                   CEI != CEE; ++CEI) {
+                if (MachineGadgetGraph::isGadgetEdge(*CEI) &&
+                    GTraits::edge_dest(*CEI) == N)
+                  ElimGadgets.erase(CEI);
+              }
+            }
+          }
+          for (auto CEI = GTraits::child_edge_begin(N),
+                    CEE = GTraits::child_edge_end(N);
+               CEI != CEE; ++CEI) {
+            GTraits::NodeRef Dest = GTraits::edge_dest(*CEI);
+            if (MachineGadgetGraph::isCFGEdge(*CEI) &&
+                !Visited.contains(Dest) && !ElimEdges.contains(CEI))
+              TraverseDFS(Dest, false);
+          }
+        };
+    TraverseDFS(*NI, true);
+    Visited.clear();
+    GadgetSinks.clear();
+  }
+  LLVM_DEBUG(dbgs() << "Eliminated " << ElimGadgets.count()
+                    << " gadget edges\n");
+  ElimEdges |= ElimGadgets;
+
+  if (!(ElimEdges.empty() && ElimNodes.empty())) {
+    int NumRemainingGadgets = NumGadgets - ElimGadgets.count();
+    Graph.reset(GraphBuilder::trim(*Graph, ElimNodes, ElimEdges,
+                                   0 /* NumFences */, NumRemainingGadgets));
+  } else {
+    Graph->NumFences = 0;
+    Graph->NumGadgets = NumGadgets;
+  }
+  return Graph;
+}
+
+void X86LoadValueInjectionLoadHardeningPass::cutEdges(
+    MachineGadgetGraph &G,
+    MachineGadgetGraph::EdgeSet &CutEdges /* out */) const {
+  if (!OptimizePluginPath.empty()) {
+    if (!OptimizeDL.isValid()) {
+      std::string ErrorMsg{};
+      OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary(
+          OptimizePluginPath.c_str(), &ErrorMsg);
+      if (!ErrorMsg.empty())
+        report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"');
+      OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut");
+      if (!OptimizeCut)
+        report_fatal_error("Invalid optimization plugin");
+    }
+    auto *Nodes = new unsigned int[G.nodes_size() + 1 /* terminator node */];
+    auto *Edges = new unsigned int[G.edges_size()];
+    auto *EdgeCuts = new int[G.edges_size()];
+    auto *EdgeValues = new int[G.edges_size()];
+    for (auto *NI = G.nodes_begin(), *NE = G.nodes_end(); NI != NE; ++NI) {
+      Nodes[std::distance(G.nodes_begin(), NI)] =
+          std::distance(G.edges_begin(), GTraits::child_edge_begin(NI));
+    }
+    Nodes[G.nodes_size()] = G.edges_size(); // terminator node
+    for (auto *EI = G.edges_begin(), *EE = G.edges_end(); EI != EE; ++EI) {
+      Edges[std::distance(G.edges_begin(), EI)] =
+          std::distance(G.nodes_begin(), GTraits::edge_dest(*EI));
+      EdgeValues[std::distance(G.edges_begin(), EI)] = EI->value();
+    }
+    OptimizeCut(Nodes, G.nodes_size(), Edges, EdgeValues, EdgeCuts,
+                G.edges_size());
+    for (int I = 0; I < G.edges_size(); ++I) {
+      if (EdgeCuts[I])
+        CutEdges.set(I);
+    }
+    delete[] Nodes;
+    delete[] Edges;
+    delete[] EdgeCuts;
+    delete[] EdgeValues;
+  } else { // Use the default greedy heuristic
+    // Find the cheapest CFG edge that will eliminate a gadget (by being egress
+    // from a SOURCE node or ingress to a SINK node), and cut it.
+    MachineGadgetGraph::NodeSet GadgetSinks{G};
+    MachineGadgetGraph::Edge *CheapestSoFar = nullptr;
+    for (auto NI = GTraits::nodes_begin(&G), NE = GTraits::nodes_end(&G);
+         NI != NE; ++NI) {
+      for (auto EI = GTraits::child_edge_begin(*NI),
+                EE = GTraits::child_edge_end(*NI);
+           EI != EE; ++EI) {
+        if (MachineGadgetGraph::isGadgetEdge(*EI)) {
+          // NI is a SOURCE node. Look for a cheap egress edge
+          for (auto EEI = GTraits::child_edge_begin(*NI); EEI != EE; ++EEI) {
+            if (MachineGadgetGraph::isCFGEdge(*EEI)) {
+              if (!CheapestSoFar || EEI->value() < CheapestSoFar->value())
+                CheapestSoFar = EEI;
+            }
+          }
+          GadgetSinks.insert(GTraits::edge_dest(*EI));
+        } else { // EI is a CFG edge
+          if (GadgetSinks.contains(GTraits::edge_dest(*EI))) {
+            // The dest is a SINK node. Hence EI is an ingress edge
+            if (!CheapestSoFar || EI->value() < CheapestSoFar->value())
+              CheapestSoFar = EI;
+          }
+        }
+      }
+    }
+    assert(CheapestSoFar && "Failed to cut an edge");
+    CutEdges.insert(CheapestSoFar);
+  }
+  LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
+}
+
+int X86LoadValueInjectionLoadHardeningPass::insertFences(
+    MachineGadgetGraph &G, EdgeSet &CutEdges /* in, out */) const {
+  int FencesInserted = 0, AdditionalEdgesCut = 0;
+  auto CutAllCFGEdges = [&CutEdges, &AdditionalEdgesCut](GTraits::NodeRef N) {
+    for (auto CEI = GTraits::child_edge_begin(N),
+              CEE = GTraits::child_edge_end(N);
+         CEI != CEE; ++CEI) {
+      if (MachineGadgetGraph::isCFGEdge(*CEI) && !CutEdges.contains(CEI)) {
+        CutEdges.insert(CEI);
+        ++AdditionalEdgesCut;
+      }
+    }
+  };
+  for (auto NI = GTraits::nodes_begin(&G), NE = GTraits::nodes_end(&G);
+       NI != NE; ++NI) {
+    for (auto CEI = GTraits::child_edge_begin(*NI),
+              CEE = GTraits::child_edge_end(*NI);
+         CEI != CEE; ++CEI) {
+      if (CutEdges.contains(CEI)) {
+        MachineInstr *MI = (*NI)->value(), *Prev;
+        MachineBasicBlock *MBB;
+        MachineBasicBlock::iterator InsertionPt;
+        if (MI == ARG_NODE) { // insert LFENCE at beginning of entry block
+          MBB = &G.getMF().front();
+          InsertionPt = MBB->begin();
+          Prev = nullptr;
+        } else if (MI->isBranch()) { // insert the LFENCE before the branch
+          MBB = MI->getParent();
+          InsertionPt = MI;
+          Prev = MI->getPrevNode();
+          CutAllCFGEdges(*NI);
+        } else { // insert the LFENCE after the instruction
+          MBB = MI->getParent();
+          InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end();
+          Prev = InsertionPt == MBB->end()
+                     ? (MBB->empty() ? nullptr : &MBB->back())
+                     : InsertionPt->getPrevNode();
+        }
+        if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) &&
+            (!Prev || !isFence(Prev))) {
+          BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
+          ++FencesInserted;
+        }
+      }
+    }
+  }
+  LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
+  LLVM_DEBUG(dbgs() << "Cut an additional " << AdditionalEdgesCut
+                    << " edges during fence insertion\n");
+  return FencesInserted;
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory(
+    const MachineInstr &MI, unsigned Reg) const {
+  if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE ||
+      MI.getOpcode() == X86::SFENCE || MI.getOpcode() == X86::LFENCE)
+    return false;
+
+  // FIXME: This does not handle pseudo loading instruction like TCRETURN*
+  const MCInstrDesc &Desc = MI.getDesc();
+  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
+  if (MemRefBeginIdx < 0) {
+    LLVM_DEBUG(dbgs() << "Warning: unable to obtain memory operand for loading "
+                         "instruction:\n";
+               MI.print(dbgs()); dbgs() << '\n';);
+    return false;
+  }
+  MemRefBeginIdx += X86II::getOperandBias(Desc);
+
+  const MachineOperand &BaseMO =
+      MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
+  const MachineOperand &IndexMO =
+      MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
+  return (BaseMO.isReg() && BaseMO.getReg() != X86::NoRegister &&
+          TRI->regsOverlap(BaseMO.getReg(), Reg)) ||
+         (IndexMO.isReg() && IndexMO.getReg() != X86::NoRegister &&
+          TRI->regsOverlap(IndexMO.getReg(), Reg));
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToBranch(
+    const MachineInstr &MI, unsigned Reg) const {
+  if (!MI.isConditionalBranch())
+    return false;
+  for (const MachineOperand &Use : MI.uses())
+    if (Use.isReg() && Use.getReg() == Reg)
+      return true;
+  return false;
+}
+
+template <unsigned K>
+bool X86LoadValueInjectionLoadHardeningPass::hasLoadFrom(
+    const MachineInstr &MI) const {
+  for (auto &MMO : MI.memoperands()) {
+    const PseudoSourceValue *PSV = MMO->getPseudoValue();
+    if (PSV && PSV->kind() == K && MMO->isLoad())
+      return true;
+  }
+  return false;
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::instrAccessesStackSlot(
+    const MachineInstr &MI) const {
+  // Check the PSV first
+  if (hasLoadFrom<PseudoSourceValue::PSVKind::FixedStack>(MI))
+    return true;
+  // Some loads are not marked with a PSV, so we always need to double check
+  const MCInstrDesc &Desc = MI.getDesc();
+  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
+  if (MemRefBeginIdx < 0)
+    return false;
+  MemRefBeginIdx += X86II::getOperandBias(Desc);
+  return MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg).isFI() &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrScaleAmt).isImm() &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg).isReg() &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrDisp).isImm() &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrScaleAmt).getImm() == 1 &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg).getReg() ==
+             X86::NoRegister &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrDisp).getImm() == 0;
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::instrAccessesConstantPool(
+    const MachineInstr &MI) const {
+  if (hasLoadFrom<PseudoSourceValue::PSVKind::ConstantPool>(MI))
+    return true;
+  const MCInstrDesc &Desc = MI.getDesc();
+  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
+  if (MemRefBeginIdx < 0)
+    return false;
+  MemRefBeginIdx += X86II::getOperandBias(Desc);
+  return MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg).isReg() &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrScaleAmt).isImm() &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg).isReg() &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrDisp).isCPI() &&
+         (MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg).getReg() ==
+              X86::RIP ||
+          MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg).getReg() ==
+              X86::NoRegister) &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrScaleAmt).getImm() == 1 &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg).getReg() ==
+             X86::NoRegister;
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::instrAccessesGOT(
+    const MachineInstr &MI) const {
+  if (hasLoadFrom<PseudoSourceValue::PSVKind::GOT>(MI))
+    return true;
+  const MCInstrDesc &Desc = MI.getDesc();
+  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
+  if (MemRefBeginIdx < 0)
+    return false;
+  MemRefBeginIdx += X86II::getOperandBias(Desc);
+  return MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg).isReg() &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrScaleAmt).isImm() &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg).isReg() &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrDisp).getTargetFlags() ==
+             X86II::MO_GOTPCREL &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg).getReg() ==
+             X86::RIP &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrScaleAmt).getImm() == 1 &&
+         MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg).getReg() ==
+             X86::NoRegister;
+}
+
+INITIALIZE_PASS_BEGIN(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
+                      "X86 LVI load hardening", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
+INITIALIZE_PASS_END(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
+                    "X86 LVI load hardening", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() {
+  return new X86LoadValueInjectionLoadHardeningPass();
+}
Index: llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
@@ -0,0 +1,141 @@
+//===-- X86LoadValueInjectionRetHardening.cpp - LVI RET hardening for x86 --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: Replaces every `ret` instruction with the sequence:
+/// ```
+/// pop <scratch-reg>
+/// lfence
+/// jmp *<scratch-reg>
+/// ```
+/// where `<scratch-reg>` is some available scratch register, according to the
+/// calling convention of the function being mitigated.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include <bitset>
+
+using namespace llvm;
+
+#define PASS_KEY "x86-lvi-ret"
+#define DEBUG_TYPE PASS_KEY
+
+STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
+STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
+STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
+                                 "were deployed");
+
+namespace {
+
+class X86LoadValueInjectionRetHardeningPass : public MachineFunctionPass {
+public:
+  X86LoadValueInjectionRetHardeningPass() : MachineFunctionPass(ID) {}
+  StringRef getPassName() const override {
+    return "X86 Load Value Injection (LVI) Ret-Hardening Pass";
+  }
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  static char ID;
+};
+
+} // end anonymous namespace
+
+char X86LoadValueInjectionRetHardeningPass::ID = 0;
+
+bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction(
+    MachineFunction &MF) {
+  const X86Subtarget *Subtarget = &MF.getSubtarget<X86Subtarget>();
+  if (!Subtarget->useLVIControlFlowIntegrity() ||
+      !(Subtarget->hasSSE2() || Subtarget->is64Bit()))
+    return false; // FIXME: support 32-bit
+
+  // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+  const Function &F = MF.getFunction();
+  if (!F.hasOptNone() && skipFunction(F))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+                    << " *****\n");
+  ++NumFunctionsConsidered;
+  const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const X86InstrInfo *TII = Subtarget->getInstrInfo();
+  unsigned ClobberReg = X86::NoRegister;
+  std::bitset<X86::NUM_TARGET_REGS> UnclobberableGR64s;
+  UnclobberableGR64s.set(X86::RSP); // can't clobber stack pointer
+  UnclobberableGR64s.set(X86::RIP); // can't clobber instruction pointer
+  UnclobberableGR64s.set(X86::RAX); // used for function return
+  UnclobberableGR64s.set(X86::RDX); // used for function return
+
+  // We can clobber any register allowed by the function's calling convention.
+  for (const MCPhysReg *PR = TRI->getCalleeSavedRegs(&MF); auto Reg = *PR; ++PR)
+    UnclobberableGR64s.set(Reg);
+  for (auto &Reg : X86::GR64RegClass) {
+    if (!UnclobberableGR64s.test(Reg)) {
+      ClobberReg = Reg;
+      break;
+    }
+  }
+
+  if (ClobberReg != X86::NoRegister) {
+    LLVM_DEBUG(dbgs() << "Selected register "
+                      << Subtarget->getRegisterInfo()->getRegAsmName(ClobberReg)
+                      << " to clobber\n");
+  } else {
+    LLVM_DEBUG(dbgs() << "Could not find a register to clobber\n");
+  }
+
+  bool Modified = false;
+  for (auto &MBB : MF) {
+    MachineInstr &MI = MBB.back();
+    if (MI.getOpcode() != X86::RETQ)
+      continue;
+
+    if (ClobberReg != X86::NoRegister) {
+      MBB.erase_instr(&MI);
+      BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::POP64r))
+          .addReg(ClobberReg, RegState::Define)
+          .setMIFlag(MachineInstr::FrameDestroy);
+      BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::LFENCE));
+      BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::JMP64r))
+          .addReg(ClobberReg);
+    } else {
+      // In case there is no available scratch register, we can still read from
+      // RSP to assert that RSP points to a valid page. The write to RSP is
+      // also helpful because it verifies that the stack's write permissions
+      // are intact.
+      MachineInstr *Fence = BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE));
+      addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)),
+                   X86::RSP, false, 0)
+          .addImm(0)
+          ->addRegisterDead(X86::EFLAGS, TRI);
+    }
+
+    ++NumFences;
+    Modified = true;
+  }
+
+  if (Modified)
+    ++NumFunctionsMitigated;
+  return Modified;
+}
+
+INITIALIZE_PASS(X86LoadValueInjectionRetHardeningPass, PASS_KEY,
+                "X86 LVI ret hardener", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionRetHardeningPass() {
+  return new X86LoadValueInjectionRetHardeningPass();
+}
Index: llvm/lib/Target/X86/X86MCInstLower.cpp
===================================================================
--- llvm/lib/Target/X86/X86MCInstLower.cpp
+++ llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1223,7 +1223,8 @@
       break;
     case MachineOperand::MO_Register:
       // FIXME: Add retpoline support and remove this.
-      if (Subtarget->useRetpolineIndirectCalls())
+      if (Subtarget->useRetpolineIndirectCalls() ||
+          Subtarget->useLVIControlFlowIntegrity())
         report_fatal_error("Lowering register statepoints with retpoline not "
                            "yet implemented.");
       CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
@@ -1402,7 +1403,8 @@
     EmitAndCountInstruction(
         MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
     // FIXME: Add retpoline support and remove this.
-    if (Subtarget->useRetpolineIndirectCalls())
+    if (Subtarget->useRetpolineIndirectCalls() ||
+        Subtarget->useLVIControlFlowIntegrity())
       report_fatal_error(
           "Lowering patchpoint with retpoline not yet implemented.");
     EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
Index: llvm/lib/Target/X86/X86Subtarget.h
===================================================================
--- llvm/lib/Target/X86/X86Subtarget.h
+++ llvm/lib/Target/X86/X86Subtarget.h
@@ -421,6 +421,16 @@
   /// than emitting one inside the compiler.
   bool UseRetpolineExternalThunk = false;
 
+  /// Prevent generation of indirect call/branch instructions from memory,
+  /// and force all indirect call/branch instructions from a register to be
+  /// preceded by an LFENCE. Also decompose RET instructions into a
+  /// POP+LFENCE+JMP sequence.
+  bool UseLVIControlFlowIntegrity = false;
+
+  /// Insert LFENCE instructions to prevent data speculatively injected into
+  /// loads from being used maliciously.
+  bool UseLVILoadHardening = false;
+
   /// Use software floating point for code generation.
   bool UseSoftFloat = false;
 
@@ -709,6 +719,8 @@
   bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
   bool preferMaskRegisters() const { return PreferMaskRegisters; }
   bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
+  bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
+  bool useLVILoadHardening() const { return UseLVILoadHardening; }
 
   unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
   unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
@@ -856,7 +868,7 @@
   /// If we are using retpolines, we need to expand indirectbr to avoid it
   /// lowering to an actual indirect jump.
   bool enableIndirectBrExpand() const override {
-    return useRetpolineIndirectBranches();
+    return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
   }
 
   /// Enable the MachineScheduler pass for all X86 subtargets.
Index: llvm/lib/Target/X86/X86TargetMachine.cpp
===================================================================
--- llvm/lib/Target/X86/X86TargetMachine.cpp
+++ llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -82,6 +82,9 @@
   initializeX86SpeculativeLoadHardeningPassPass(PR);
   initializeX86FlagsCopyLoweringPassPass(PR);
   initializeX86CondBrFoldingPassPass(PR);
+  initializeX86LoadValueInjectionIndirectThunksPassPass(PR);
+  initializeX86LoadValueInjectionLoadHardeningPassPass(PR);
+  initializeX86LoadValueInjectionRetHardeningPassPass(PR);
   initializeX86OptimizeLEAPassPass(PR);
 }
 
@@ -496,6 +499,7 @@
 
 void X86PassConfig::addPostRegAlloc() {
   addPass(createX86FloatingPointStackifierPass());
+  addPass(createX86LoadValueInjectionLoadHardeningPass());
 }
 
 void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }
@@ -527,6 +531,7 @@
   const MCAsmInfo *MAI = TM->getMCAsmInfo();
 
   addPass(createX86RetpolineThunksPass());
+  addPass(createX86LoadValueInjectionIndirectThunksPass());
 
   // Insert extra int3 instructions after trailing call instructions to avoid
   // issues in the unwinder.
@@ -543,6 +548,7 @@
   // Identify valid longjmp targets for Windows Control Flow Guard.
   if (TT.isOSWindows())
     addPass(createCFGuardLongjmpPass());
+  addPass(createX86LoadValueInjectionRetHardeningPass());
 }
 
 std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
Index: llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
===================================================================
--- llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -1215,7 +1215,8 @@
       // Jump tables are only profitable if the retpoline mitigation is enabled.
       Attribute FSAttr = CS.getCaller()->getFnAttribute("target-features");
       if (FSAttr.hasAttribute(Attribute::None) ||
-          !FSAttr.getValueAsString().contains("+retpoline"))
+          !(FSAttr.getValueAsString().contains("+retpoline") ||
+            FSAttr.getValueAsString().contains("+lvi-cfi")))
         continue;
 
       if (RemarksEnabled)
Index: llvm/test/CodeGen/X86/O0-pipeline.ll
===================================================================
--- llvm/test/CodeGen/X86/O0-pipeline.ll
+++ llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -55,6 +55,10 @@
 ; CHECK-NEXT:       Fast Register Allocator
 ; CHECK-NEXT:       Bundle Machine CFG Edges
 ; CHECK-NEXT:       X86 FP Stackifier
+; CHECK-NEXT:       MachineDominator Tree Construction
+; CHECK-NEXT:       Machine Natural Loop Construction
+; CHECK-NEXT:       Machine Dominance Frontier Construction
+; CHECK-NEXT:       X86 Load Value Injection (LVI) Load Hardening Pass
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
 ; CHECK-NEXT:       Prologue/Epilogue Insertion & Frame Finalization
@@ -73,7 +77,9 @@
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
 ; CHECK-NEXT:       X86 Retpoline Thunks
+; CHECK-NEXT:       X86 Load Value Injection (LVI) Indirect Thunks Pass
 ; CHECK-NEXT:       Check CFA info and insert CFI instructions if needed
+; CHECK-NEXT:       X86 Load Value Injection (LVI) Ret-Hardening Pass
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
 ; CHECK-NEXT:       X86 Assembly Printer
Index: llvm/test/CodeGen/X86/O3-pipeline.ll
===================================================================
--- llvm/test/CodeGen/X86/O3-pipeline.ll
+++ llvm/test/CodeGen/X86/O3-pipeline.ll
@@ -138,9 +138,11 @@
 ; CHECK-NEXT:       Machine Loop Invariant Code Motion
 ; CHECK-NEXT:       Bundle Machine CFG Edges
 ; CHECK-NEXT:       X86 FP Stackifier
+; CHECK-NEXT:       MachineDominator Tree Construction
+; CHECK-NEXT:       Machine Dominance Frontier Construction
+; CHECK-NEXT:       X86 Load Value Injection (LVI) Load Hardening Pass
 ; CHECK-NEXT:       PostRA Machine Sink
 ; CHECK-NEXT:       Machine Block Frequency Analysis
-; CHECK-NEXT:       MachineDominator Tree Construction
 ; CHECK-NEXT:       MachinePostDominator Tree Construction
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
@@ -182,7 +184,9 @@
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
 ; CHECK-NEXT:       X86 Retpoline Thunks
+; CHECK-NEXT:       X86 Load Value Injection (LVI) Indirect Thunks Pass
 ; CHECK-NEXT:       Check CFA info and insert CFI instructions if needed
+; CHECK-NEXT:       X86 Load Value Injection (LVI) Ret-Hardening Pass
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
 ; CHECK-NEXT:       X86 Assembly Printer
Index: llvm/test/CodeGen/X86/lvi-hardening-gadget-graph.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/lvi-hardening-gadget-graph.ll
@@ -0,0 +1,129 @@
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -x86-lvi-load-dot-verify -o %t < %s | FileCheck %s
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @test(i32* %untrusted_user_ptr, i32* %secret, i32 %secret_size) #0 {
+entry:
+  %untrusted_user_ptr.addr = alloca i32*, align 8
+  %secret.addr = alloca i32*, align 8
+  %secret_size.addr = alloca i32, align 4
+  %ret_val = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32* %untrusted_user_ptr, i32** %untrusted_user_ptr.addr, align 8
+  store i32* %secret, i32** %secret.addr, align 8
+  store i32 %secret_size, i32* %secret_size.addr, align 4
+  store i32 0, i32* %ret_val, align 4
+  call void @llvm.x86.sse2.lfence()
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %1 = load i32, i32* %secret_size.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32, i32* %i, align 4
+  %rem = srem i32 %2, 2
+  %cmp1 = icmp eq i32 %rem, 0
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:                                          ; preds = %for.body
+  %3 = load i32*, i32** %secret.addr, align 8
+  %4 = load i32, i32* %ret_val, align 4
+  %idxprom = sext i32 %4 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %3, i64 %idxprom
+  %5 = load i32, i32* %arrayidx, align 4
+  %6 = load i32*, i32** %untrusted_user_ptr.addr, align 8
+  store i32 %5, i32* %6, align 4
+  br label %if.end
+
+if.else:                                          ; preds = %for.body
+  %7 = load i32*, i32** %secret.addr, align 8
+  %8 = load i32, i32* %ret_val, align 4
+  %idxprom2 = sext i32 %8 to i64
+  %arrayidx3 = getelementptr inbounds i32, i32* %7, i64 %idxprom2
+  store i32 42, i32* %arrayidx3, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %9 = load i32*, i32** %untrusted_user_ptr.addr, align 8
+  %10 = load i32, i32* %9, align 4
+  store i32 %10, i32* %ret_val, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %11 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %11, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %12 = load i32, i32* %ret_val, align 4
+  ret i32 %12
+}
+
+; CHECK:      digraph "Speculative gadgets for \"test\" function" {
+; CHECK-NEXT: label="Speculative gadgets for \"test\" function";
+; CHECK:      Node0x{{[0-9a-f]+}} [shape=record,color = green,label="{LFENCE\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 0];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm %stack.4.i, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.i)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{JCC_1 %bb.6, 13, implicit killed $eflags\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{CMP32rm killed renamable $eax, %stack.2.secret_size.addr, 1, $noreg, 0, $noreg, implicit-def $eflags :: (dereferenceable load 4 from %ir.secret_size.addr)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm %stack.4.i, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.i)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{JCC_1 %bb.4, 5, implicit killed $eflags\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rax = MOV64rm %stack.1.secret.addr, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.secret.addr)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm killed renamable $rax, 4, killed renamable $rcx, 0, $noreg :: (load 4 from %ir.arrayidx)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rcx = MOVSX64rm32 %stack.3.ret_val, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.ret_val)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rcx = MOV64rm %stack.0.untrusted_user_ptr.addr, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.untrusted_user_ptr.addr)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{MOV32mr killed renamable $rcx, 1, $noreg, 0, $noreg, killed renamable $eax :: (store 4 into %ir.6)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rax = MOV64rm %stack.1.secret.addr, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.secret.addr)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{MOV32mi killed renamable $rax, 4, killed renamable $rcx, 0, $noreg, 42 :: (store 4 into %ir.arrayidx3)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rcx = MOVSX64rm32 %stack.3.ret_val, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.ret_val)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rax = MOV64rm %stack.0.untrusted_user_ptr.addr, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.untrusted_user_ptr.addr)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm killed renamable $rax, 1, $noreg, 0, $noreg :: (load 4 from %ir.9)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,color = blue,label="{ARGS}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 0];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{MOV64mr %stack.0.untrusted_user_ptr.addr, 1, $noreg, 0, $noreg, killed renamable $rdi :: (store 8 into %ir.untrusted_user_ptr.addr)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 0];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{JMP_1 %bb.5\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{JMP_1 %bb.1\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm %stack.3.ret_val, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.ret_val)\n}"];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 0];
+; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{RET 0, $eax\n}"];
+; CHECK-NEXT: }
+
+; Function Attrs: nounwind
+declare void @llvm.x86.sse2.lfence() #1
+
+attributes #0 = { "target-features"="+lvi-cfi"
+                  "target-features"="+lvi-load-hardening" }
+attributes #1 = { nounwind }
Index: llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll
@@ -0,0 +1,282 @@
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --check-prefix=X64FAST
+;
+; Note that a lot of this code was lifted from retpoline.ll.
+
+declare void @bar(i32)
+
+; Test a simple indirect call and tail call.
+define void @icall_reg(void (i32)* %fp, i32 %x) #0 {
+entry:
+  tail call void @bar(i32 %x)
+  tail call void %fp(i32 %x)
+  tail call void @bar(i32 %x)
+  tail call void %fp(i32 %x)
+  ret void
+}
+
+; X64-LABEL: icall_reg:
+; X64-DAG:   movq %rdi, %[[fp:[^ ]*]]
+; X64-DAG:   movl %esi, %[[x:[^ ]*]]
+; X64:       movl %esi, %edi
+; X64:       callq bar
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:       callq __x86_indirect_thunk_r11
+; X64:       movl %[[x]], %edi
+; X64:       callq bar
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:       jmp __x86_indirect_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_reg:
+; X64FAST:       callq bar
+; X64FAST:       callq __x86_indirect_thunk_r11
+; X64FAST:       callq bar
+; X64FAST:       jmp __x86_indirect_thunk_r11 # TAILCALL
+
+
+@global_fp = external global void (i32)*
+
+; Test an indirect call through a global variable.
+define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
+  %fp1 = load void (i32)*, void (i32)** @global_fp
+  call void %fp1(i32 %x)
+  %fp2 = load void (i32)*, void (i32)** @global_fp
+  tail call void %fp2(i32 %x)
+  ret void
+}
+
+; X64-LABEL: icall_global_fp:
+; X64-DAG:   movl %edi, %[[x:[^ ]*]]
+; X64-DAG:   movq global_fp(%rip), %r11
+; X64:       callq __x86_indirect_thunk_r11
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq global_fp(%rip), %r11
+; X64:       jmp __x86_indirect_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_global_fp:
+; X64FAST:       movq global_fp(%rip), %r11
+; X64FAST:       callq __x86_indirect_thunk_r11
+; X64FAST:       movq global_fp(%rip), %r11
+; X64FAST:       jmp __x86_indirect_thunk_r11 # TAILCALL
+
+
+%struct.Foo = type { void (%struct.Foo*)** }
+
+; Test an indirect call through a vtable.
+define void @vcall(%struct.Foo* %obj) #0 {
+  %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
+  %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
+  %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
+  %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
+  tail call void %fp(%struct.Foo* %obj)
+  tail call void %fp(%struct.Foo* %obj)
+  ret void
+}
+
+; X64-LABEL: vcall:
+; X64:       movq %rdi, %[[obj:[^ ]*]]
+; X64:       movq (%rdi), %[[vptr:[^ ]*]]
+; X64:       movq 8(%[[vptr]]), %[[fp:[^ ]*]]
+; X64:       movq %[[fp]], %r11
+; X64:       callq __x86_indirect_thunk_r11
+; X64-DAG:   movq %[[obj]], %rdi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:       jmp __x86_indirect_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: vcall:
+; X64FAST:       callq __x86_indirect_thunk_r11
+; X64FAST:       jmp __x86_indirect_thunk_r11 # TAILCALL
+
+
+declare void @direct_callee()
+
+define void @direct_tail() #0 {
+  tail call void @direct_callee()
+  ret void
+}
+
+; X64-LABEL: direct_tail:
+; X64:       jmp direct_callee # TAILCALL
+; X64FAST-LABEL: direct_tail:
+; X64FAST:   jmp direct_callee # TAILCALL
+
+
+declare void @nonlazybind_callee() #1
+
+define void @nonlazybind_caller() #0 {
+  call void @nonlazybind_callee()
+  tail call void @nonlazybind_callee()
+  ret void
+}
+
+; X64-LABEL: nonlazybind_caller:
+; X64:       movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]
+; X64:       movq %[[REG]], %r11
+; X64:       callq __x86_indirect_thunk_r11
+; X64:       movq %[[REG]], %r11
+; X64:       jmp __x86_indirect_thunk_r11 # TAILCALL
+; X64FAST-LABEL: nonlazybind_caller:
+; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
+; X64FAST:   callq __x86_indirect_thunk_r11
+; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
+; X64FAST:   jmp __x86_indirect_thunk_r11 # TAILCALL
+
+
+; Check that a switch gets lowered using a jump table
+define void @switch_jumptable(i32* %ptr, i64* %sink) #0 {
+; X64-LABEL: switch_jumptable:
+; X64_NOT:      jmpq *
+entry:
+  br label %header
+
+header:
+  %i = load volatile i32, i32* %ptr
+  switch i32 %i, label %bb0 [
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb4
+    i32 5, label %bb5
+    i32 6, label %bb6
+    i32 7, label %bb7
+    i32 8, label %bb8
+    i32 9, label %bb9
+  ]
+
+bb0:
+  store volatile i64 0, i64* %sink
+  br label %header
+
+bb1:
+  store volatile i64 1, i64* %sink
+  br label %header
+
+bb2:
+  store volatile i64 2, i64* %sink
+  br label %header
+
+bb3:
+  store volatile i64 3, i64* %sink
+  br label %header
+
+bb4:
+  store volatile i64 4, i64* %sink
+  br label %header
+
+bb5:
+  store volatile i64 5, i64* %sink
+  br label %header
+
+bb6:
+  store volatile i64 6, i64* %sink
+  br label %header
+
+bb7:
+  store volatile i64 7, i64* %sink
+  br label %header
+
+bb8:
+  store volatile i64 8, i64* %sink
+  br label %header
+
+bb9:
+  store volatile i64 9, i64* %sink
+  br label %header
+}
+
+
+@indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb1),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb2),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb3),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb4),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb5),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb6),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb7),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb8),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb9)]
+
+; Check that when thunks are enabled the indirectbr instruction gets
+; rewritten to use switch, and that in turn doesn't get lowered as a jump
+; table.
+define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #0 {
+; X64-LABEL: indirectbr_rewrite:
+; X64-NOT:     jmpq *
+entry:
+  %i0 = load i64, i64* %p
+  %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0
+  %target0 = load i8*, i8** %target.i0
+  indirectbr i8* %target0, [label %bb1, label %bb3]
+
+bb0:
+  store volatile i64 0, i64* %sink
+  br label %latch
+
+bb1:
+  store volatile i64 1, i64* %sink
+  br label %latch
+
+bb2:
+  store volatile i64 2, i64* %sink
+  br label %latch
+
+bb3:
+  store volatile i64 3, i64* %sink
+  br label %latch
+
+bb4:
+  store volatile i64 4, i64* %sink
+  br label %latch
+
+bb5:
+  store volatile i64 5, i64* %sink
+  br label %latch
+
+bb6:
+  store volatile i64 6, i64* %sink
+  br label %latch
+
+bb7:
+  store volatile i64 7, i64* %sink
+  br label %latch
+
+bb8:
+  store volatile i64 8, i64* %sink
+  br label %latch
+
+bb9:
+  store volatile i64 9, i64* %sink
+  br label %latch
+
+latch:
+  %i.next = load i64, i64* %p
+  %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next
+  %target.next = load i8*, i8** %target.i.next
+  ; Potentially hit a full 10 successors here so that even if we rewrite as
+  ; a switch it will try to be lowered with a jump table.
+  indirectbr i8* %target.next, [label %bb0,
+                                label %bb1,
+                                label %bb2,
+                                label %bb3,
+                                label %bb4,
+                                label %bb5,
+                                label %bb6,
+                                label %bb7,
+                                label %bb8,
+                                label %bb9]
+}
+
+; Lastly check that the necessary thunks were emitted.
+;
+; X64-LABEL:         .section        .text.__x86_indirect_thunk_r11,{{.*}},__x86_indirect_thunk_r11,comdat
+; X64-NEXT:          .hidden __x86_indirect_thunk_r11
+; X64-NEXT:          .weak   __x86_indirect_thunk_r11
+; X64:       __x86_indirect_thunk_r11:
+; X64-NEXT:  # {{.*}}                                # %entry
+; X64-NEXT:          lfence
+; X64-NEXT:          jmpq     *%r11
+
+attributes #0 = { "target-features"="+lvi-cfi" }
+attributes #1 = { nonlazybind }
Index: llvm/test/CodeGen/X86/lvi-hardening-inline-asm.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/lvi-hardening-inline-asm.ll
@@ -0,0 +1,135 @@
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -mattr=+lvi-load-hardening -mattr=+lvi-cfi < %s -o %t.out 2> %t.err
+; RUN: FileCheck %s --check-prefix=X86 < %t.out
+; RUN: FileCheck %s --check-prefix=WARN < %t.err
+
+; Test module-level assembly
+module asm "pop %rbx"
+module asm "ret"
+; WARN:      warning: Instruction may be vulnerable to LVI
+; WARN-NEXT: ret
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @test_inline_asm() {
+entry:
+; X86-LABEL: test_inline_asm:
+  call void asm sideeffect "mov 0x3fed(%rip),%rax", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      movq  16365(%rip), %rax
+; X86-NEXT: lfence
+  call void asm sideeffect "movdqa 0x0(%rip),%xmm0", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      movdqa  (%rip), %xmm0
+; X86-NEXT: lfence
+  call void asm sideeffect "movslq 0x3e5d(%rip),%rbx", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      movslq  15965(%rip), %rbx
+; X86-NEXT: lfence
+  call void asm sideeffect "mov (%r12,%rax,8),%rax", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      movq  (%r12,%rax,8), %rax
+; X86-NEXT: lfence
+  call void asm sideeffect "movq (24)(%rsi), %r11", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      movq  24(%rsi), %r11
+; X86-NEXT: lfence
+  call void asm sideeffect "cmove %r12,%rax", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      cmoveq  %r12, %rax
+; X86-NOT:  lfence
+  call void asm sideeffect "cmove (%r12),%rax", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      cmoveq  (%r12), %rax
+; X86-NEXT: lfence
+  call void asm sideeffect "pop %rbx", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      popq  %rbx
+; X86-NEXT: lfence
+  call void asm sideeffect "popq %rbx", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      popq  %rbx
+; X86-NEXT: lfence
+  call void asm sideeffect "xchg (%r12),%rax", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      xchgq %rax, (%r12)
+; X86-NEXT: lfence
+  call void asm sideeffect "cmpxchg %r12,(%rax)", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      cmpxchgq  %r12, (%rax)
+; X86-NEXT: lfence
+  call void asm sideeffect "vpxor (%rcx,%rdx,1),%ymm1,%ymm0", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      vpxor (%rcx,%rdx), %ymm1, %ymm0
+; X86-NEXT: lfence
+  call void asm sideeffect "vpmuludq 0x20(%rsi),%ymm0,%ymm12", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      vpmuludq  32(%rsi), %ymm0, %ymm12
+; X86-NEXT: lfence
+  call void asm sideeffect "vpexpandq 0x40(%rdi),%zmm8{%k2}{z}", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      vpexpandq 64(%rdi), %zmm8 {%k2} {z}
+; X86-NEXT: lfence
+  call void asm sideeffect "addq (%r12),%rax", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      addq  (%r12), %rax
+; X86-NEXT: lfence
+  call void asm sideeffect "subq Lpoly+0(%rip), %rax", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      subq  Lpoly+0(%rip), %rax
+; X86-NEXT: lfence
+  call void asm sideeffect "adcq %r12,(%rax)", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      adcq  %r12, (%rax)
+; X86-NEXT: lfence
+  call void asm sideeffect "negq (%rax)", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      negq  (%rax)
+; X86-NEXT: lfence
+  call void asm sideeffect "incq %rax", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      incq  %rax
+; X86-NOT:  lfence
+  call void asm sideeffect "mulq (%rax)", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      mulq  (%rax)
+; X86-NEXT: lfence
+  call void asm sideeffect "imulq (%rax),%rdx", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      imulq (%rax), %rdx
+; X86-NEXT: lfence
+  call void asm sideeffect "shlq $$1,(%rax)", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      shlq  (%rax)
+; X86-NEXT: lfence
+  call void asm sideeffect "shrq $$1,(%rax)", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      shrq  (%rax)
+; X86-NEXT: lfence
+  call void asm sideeffect "repz cmpsb %es:(%rdi),%ds:(%rsi)", "~{dirflag},~{fpsr},~{flags}"() #1
+; WARN:      warning: Instruction may be vulnerable to LVI
+; WARN-NEXT: repz cmpsb %es:(%rdi),%ds:(%rsi)
+; X86:      rep cmpsb %es:(%rdi), %ds:(%rsi)
+; X86-NOT:  lfence
+  call void asm sideeffect "repnz scasb", "~{dirflag},~{fpsr},~{flags}"() #1
+; WARN:      warning: Instruction may be vulnerable to LVI
+; WARN-NEXT: repnz scasb
+; X86:      repne scasb %es:(%rdi), %al
+; X86-NOT:  lfence
+  call void asm sideeffect "pinsrw $$0x6,(%eax),%xmm0", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      pinsrw  $6, (%eax), %xmm0
+; X86-NEXT: lfence
+  call void asm sideeffect "ret", "~{dirflag},~{fpsr},~{flags}"() #1
+; WARN:      warning: Instruction may be vulnerable to LVI
+; WARN-NEXT: ret
+; X86:      retq
+; X86-NOT:  lfence
+  call void asm sideeffect "ret $$8", "~{dirflag},~{fpsr},~{flags}"() #1
+; WARN:      warning: Instruction may be vulnerable to LVI
+; WARN-NEXT: ret $8
+; X86:      retq  $8
+; X86-NOT:  lfence
+  call void asm sideeffect "jmpq *(%rdx)", "~{dirflag},~{fpsr},~{flags}"() #1
+; WARN:      warning: Instruction may be vulnerable to LVI
+; WARN-NEXT: jmpq *(%rdx)
+; X86:      jmpq *(%rdx)
+; X86-NOT:  lfence
+  call void asm sideeffect "jmpq *0x100(%rdx)", "~{dirflag},~{fpsr},~{flags}"() #1
+; WARN:      warning: Instruction may be vulnerable to LVI
+; WARN-NEXT: jmpq *0x100(%rdx)
+; X86:      jmpq *256(%rdx)
+; X86-NOT:  lfence
+  call void asm sideeffect "callq *200(%rdx)", "~{dirflag},~{fpsr},~{flags}"() #1
+; WARN:      warning: Instruction may be vulnerable to LVI
+; WARN-NEXT: callq *200(%rdx)
+; X86:      callq *200(%rdx)
+; X86-NOT:  lfence
+  call void asm sideeffect "fldt 0x8(%rbp)", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      fldt  8(%rbp)
+; X86-NEXT: lfence
+  call void asm sideeffect "fld %st(0)", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      fld %st(0)
+; X86-NOT:  lfence
+; Test assembler macros
+  call void asm sideeffect ".macro mplus1 x\0Aincq (\5Cx)\0A.endm\0Amplus1 %rcx", "~{dirflag},~{fpsr},~{flags}"() #1
+; X86:      incq (%rcx)
+; X86-NEXT: lfence
+  ret void
+}
+
+attributes #1 = { nounwind }
Index: llvm/test/CodeGen/X86/lvi-hardening-loads.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/lvi-hardening-loads.ll
@@ -0,0 +1,102 @@
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-CBFX
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-fixed < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-CB
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-cbranch < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-FX
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-fixed --x86-lvi-load-no-cbranch < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-BASE
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @test(i32** %secret, i32 %secret_size) #0 {
+; X64-LABEL: test:
+entry:
+  %secret.addr = alloca i32**, align 8
+  %secret_size.addr = alloca i32, align 4
+  %ret_val = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32** %secret, i32*** %secret.addr, align 8
+  store i32 %secret_size, i32* %secret_size.addr, align 4
+  store i32 0, i32* %ret_val, align 4
+  call void @llvm.x86.sse2.lfence()
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+; X64: # %bb.0: # %entry
+; X64-NEXT:      movq %rdi, -{{[0-9]+}}(%rsp)
+; X64-NEXT:      movl %esi, -{{[0-9]+}}(%rsp)
+; X64-NEXT:      movl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:      lfence
+; X64-NEXT:      movl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:      jmp .LBB0_1
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %1 = load i32, i32* %secret_size.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+
+; X64: .LBB0_1: # %for.cond
+; X64-NEXT:      # =>This Inner Loop Header: Depth=1
+; X64-NEXT:      movl -{{[0-9]+}}(%rsp), %eax
+; X64-CBFX-NEXT: lfence
+; X64-NEXT:      cmpl -{{[0-9]+}}(%rsp), %eax
+; X64-CBFX-NEXT: lfence
+; X64-NEXT:      jge .LBB0_5
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32, i32* %i, align 4
+  %rem = srem i32 %2, 2
+  %cmp1 = icmp eq i32 %rem, 0
+  br i1 %cmp1, label %if.then, label %if.end
+
+; X64: # %bb.2: # %for.body
+; X64-NEXT: # in Loop: Header=BB0_1 Depth=1
+; X64-NEXT:      movl -{{[0-9]+}}(%rsp), %eax
+; X64-CBFX-NEXT: lfence
+; X64-NEXT:      movl %eax, %ecx
+; X64-NEXT:      shrl $31, %ecx
+; X64-NEXT:      addl %eax, %ecx
+; X64-NEXT:      andl $-2, %ecx
+; X64-NEXT:      cmpl %ecx, %eax
+; X64-NEXT:      jne .LBB0_4
+
+if.then:                                          ; preds = %for.body
+  %3 = load i32**, i32*** %secret.addr, align 8
+  %4 = load i32, i32* %ret_val, align 4
+  %idxprom = sext i32 %4 to i64
+  %arrayidx = getelementptr inbounds i32*, i32** %3, i64 %idxprom
+  %5 = load i32*, i32** %arrayidx, align 8
+  %6 = load i32, i32* %5, align 4
+  store i32 %6, i32* %ret_val, align 4
+  br label %if.end
+
+; X64: # %bb.3: # %if.then
+; X64-NEXT: # in Loop: Header=BB0_1 Depth=1
+; X64-NEXT:      movq -{{[0-9]+}}(%rsp), %rax
+; X64-CBFX-NEXT: lfence
+; X64-FX-NEXT:   lfence
+; X64-NEXT:      movslq -{{[0-9]+}}(%rsp), %rcx
+; X64-CBFX-NEXT: lfence
+; X64-FX-NEXT:   lfence
+; X64-NEXT:      movq (%rax,%rcx,8), %rax
+; X64-NEXT:      lfence
+; X64-NEXT:      movl (%rax), %eax
+; X64-NEXT:      movl %eax, -{{[0-9]+}}(%rsp)
+; X64-NEXT:      jmp .LBB0_4
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %7 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32, i32* %ret_val, align 4
+  ret i32 %8
+}
+
+; Function Attrs: nounwind
+declare void @llvm.x86.sse2.lfence() #1
+
+attributes #0 = { "target-features"="+lvi-load-hardening" }
+attributes #1 = { nounwind }
Index: llvm/test/CodeGen/X86/lvi-hardening-ret.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/lvi-hardening-ret.ll
@@ -0,0 +1,72 @@
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s
+
+define dso_local void @one_instruction() #0 {
+; CHECK-LABEL: one_instruction:
+entry:
+  ret void
+; CHECK-NOT:   retq
+; CHECK:       popq %[[x:[^ ]*]]
+; CHECK-NEXT:  lfence
+; CHECK-NEXT:  jmpq *%[[x]]
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @ordinary_function(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: ordinary_function:
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4
+  %1 = load i32, i32* %y.addr, align 4
+  %add = add nsw i32 %0, %1
+  ret i32 %add
+; CHECK-NOT:  retq
+; CHECK:      popq %[[x:[^ ]*]]
+; CHECK-NEXT: lfence
+; CHECK-NEXT: jmpq *%[[x]]
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @no_caller_saved_registers_function(i32 %x, i32 %y) #1 {
+; CHECK-LABEL: no_caller_saved_registers_function:
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4
+  %1 = load i32, i32* %y.addr, align 4
+  %add = add nsw i32 %0, %1
+  ret i32 %add
+; CHECK-NOT:  retq
+; CHECK:      shlq $0, (%{{[^ ]*}})
+; CHECK-NEXT: lfence
+; CHECK-NEXT: retq
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local preserve_mostcc void @preserve_most() #0 {
+; CHECK-LABEL: preserve_most:
+entry:
+  ret void
+; CHECK-NOT:  retq
+; CHECK:      popq %r11
+; CHECK-NEXT: lfence
+; CHECK-NEXT: jmpq *%r11
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local preserve_allcc void @preserve_all() #0 {
+; CHECK-LABEL: preserve_all:
+entry:
+  ret void
+; CHECK-NOT:  retq
+; CHECK:      popq %r11
+; CHECK-NEXT: lfence
+; CHECK-NEXT: jmpq *%r11
+}
+
+attributes #0 = { "target-features"="+lvi-cfi" }
+attributes #1 = { "no_caller_saved_registers" "target-features"="+lvi-cfi" }