Index: llvm/lib/Target/X86/X86InstrFoldTables.cpp
===================================================================
--- llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -340,6 +340,8 @@
   { X86::TAILJMPr,            X86::TAILJMPm,            TB_FOLDED_LOAD },
   { X86::TAILJMPr64,          X86::TAILJMPm64,          TB_FOLDED_LOAD },
   { X86::TAILJMPr64_REX,      X86::TAILJMPm64_REX,      TB_FOLDED_LOAD },
+  { X86::TCRETURNri,          X86::TCRETURNmi,          TB_FOLDED_LOAD },
+  { X86::TCRETURNri64,        X86::TCRETURNmi64,        TB_FOLDED_LOAD },
   { X86::TEST16ri,            X86::TEST16mi,            TB_FOLDED_LOAD },
   { X86::TEST16rr,            X86::TEST16mr,            TB_FOLDED_LOAD },
   { X86::TEST32ri,            X86::TEST32mi,            TB_FOLDED_LOAD },
Index: llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
===================================================================
--- llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -23,6 +23,7 @@
 #include "X86.h"
 #include "X86InstrBuilder.h"
 #include "X86InstrInfo.h"
+#include "X86InstrFoldTables.h"
 #include "X86Subtarget.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -70,6 +71,8 @@
           "Number of address mode used registers hardaned");
 STATISTIC(NumPostLoadRegsHardened,
           "Number of post-load register values hardened");
+STATISTIC(NumCallsOrJumpsHardened,
+          "Number of calls or jumps requiring extra hardening");
 STATISTIC(NumInstsInserted, "Number of instructions inserted");
 STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
 
@@ -105,6 +108,13 @@
                          "significant security is provided."),
                 cl::init(true), cl::Hidden);
 
+static cl::opt<bool> HardenIndirectCallsAndJumps(
+    PASS_KEY "-indirect",
+    cl::desc("Harden indirect calls and jumps against using speculatively "
+             "stored attacker controlled addresses. This is designed to "
+             "mitigate Spectre v1.2 style attacks."),
+    cl::init(true), cl::Hidden);
+
 namespace llvm {
 
 void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
@@ -168,6 +178,8 @@
   SmallVector<MachineInstr *, 16>
   tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);
 
+  void unfoldCallAndJumpLoads(MachineFunction &MF);
+
   void hardenAllLoads(MachineFunction &MF);
 
   unsigned saveEFLAGS(MachineBasicBlock &MBB,
@@ -193,6 +205,9 @@
   bool canHardenRegister(unsigned Reg);
   void hardenPostLoad(MachineInstr &MI);
   void hardenReturnInstr(MachineInstr &MI);
+  void hardenIndirectCallOrJumpInstr(
+      MachineInstr &MI,
+      SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
 };
 
 } // end anonymous namespace
@@ -504,6 +519,11 @@
     }
   }
 
+  // If we are going to harden calls and jumps we need to unfold their memory
+  // operands.
+  if (HardenIndirectCallsAndJumps)
+    unfoldCallAndJumpLoads(MF);
+
   // Now harden all of the loads in the function using the predicate state.
   hardenAllLoads(MF);
 
@@ -814,6 +834,112 @@
   return CMovs;
 }
 
+/// Compute the register class for the unfolded load.
+///
+/// FIXME: This should probably live in X86InstrInfo, potentially by adding
+/// a way to unfold into a newly created vreg rather than requiring a register
+/// input.
+static const TargetRegisterClass *
+getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII,
+                           unsigned Opcode) {
+  const X86MemoryFoldTableEntry *I = lookupUnfoldTable(Opcode);
+  if (I == nullptr)
+    return nullptr;
+  unsigned UnfoldedOpc = I->DstOp;
+  unsigned Index = I->Flags & TB_INDEX_MASK;
+  const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
+  return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF);
+}
+
+void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
+    MachineFunction &MF) {
+  for (MachineBasicBlock &MBB : MF)
+    for (auto MII = MBB.instr_begin(), MIE = MBB.instr_end(); MII != MIE;) {
+      // Grab a reference and increment the iterator so we can remove this
+      // instruction if needed without disturbing the iteration.
+      MachineInstr &MI = *MII++;
+
+      // Must either be a call or a branch.
+      if (!MI.isCall() && !MI.isBranch())
+        continue;
+      // We only care about loading variants of these instructions.
+      if (!MI.mayLoad())
+        continue;
+
+      switch (MI.getOpcode()) {
+      default: {
+        LLVM_DEBUG(
+            dbgs() << "ERROR: Found an unexpected loading branch or call "
+                      "instruction:\n";
+            MI.dump(); dbgs() << "\n");
+        report_fatal_error("Unexpected loading branch or call!");
+      }
+
+      case X86::FARCALL16m:
+      case X86::FARCALL32m:
+      case X86::FARCALL64:
+      case X86::FARJMP16m:
+      case X86::FARJMP32m:
+      case X86::FARJMP64:
+        // We cannot mitigate far jumps or calls, but we also don't expect them
+        // to be vulnerable to Spectre v1.2 style attacks.
+        continue;
+
+      case X86::CALL16m:
+      case X86::CALL16m_NT:
+      case X86::CALL32m:
+      case X86::CALL32m_NT:
+      case X86::CALL64m:
+      case X86::CALL64m_NT:
+      case X86::JMP16m:
+      case X86::JMP16m_NT:
+      case X86::JMP32m:
+      case X86::JMP32m_NT:
+      case X86::JMP64m:
+      case X86::JMP64m_NT:
+      case X86::TAILJMPm64:
+      case X86::TAILJMPm64_REX:
+      case X86::TAILJMPm:
+      case X86::TCRETURNmi64:
+      case X86::TCRETURNmi: {
+        // Use the generic unfold logic now that we know we're dealing with
+        // expected instructions.
+        // FIXME: We don't have test coverage for all of these!
+        auto *UnfoldedRC = getRegClassForUnfoldedLoad(MF, *TII, MI.getOpcode());
+        if (!UnfoldedRC) {
+          LLVM_DEBUG(dbgs()
+                         << "ERROR: Unable to unfold load from instruction:\n";
+                     MI.dump(); dbgs() << "\n");
+          report_fatal_error("Unable to unfold load!");
+        }
+        unsigned Reg = MRI->createVirtualRegister(UnfoldedRC);
+        SmallVector<MachineInstr *, 2> NewMIs;
+        // If we were able to compute an unfolded reg class, any failure here
+        // is just a programming error so just assert.
+        bool Unfolded =
+            TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true,
+                                     /*UnfoldStore*/ false, NewMIs);
+        (void)Unfolded;
+        assert(Unfolded &&
+               "Computed unfolded register class but failed to unfold");
+        // Now stitch the new instructions into place and erase the old one.
+        for (auto *NewMI : NewMIs)
+          MBB.insert(MI.getIterator(), NewMI);
+        MI.eraseFromParent();
+        LLVM_DEBUG({
+          dbgs() << "Unfolded load successfully into:\n";
+          for (auto *NewMI : NewMIs) {
+            NewMI->dump();
+            dbgs() << "\n";
+          }
+        });
+        continue;
+      }
+      }
+      llvm_unreachable("Escaped switch with default!");
+    }
+}
+
 /// Returns true if the instruction has no behavior (specified or otherwise)
 /// that is based on the value of any of its register operands
 ///
@@ -1439,6 +1565,14 @@
         continue;
       }
 
+      // Check for an indirect call or branch that may need its input hardened
+      // even if we couldn't find the specific load used, or were able to avoid
+      // hardening it for some reason. Note that here we cannot break out
+      // afterward as we may still need to handle any call aspect of this
+      // instruction.
+      if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps)
+        hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg);
+
       // After we finish processing the instruction and doing any hardening
       // necessary for it, we need to handle transferring the predicate state
       // into a call and recovering it after the call returns (if it returns).
@@ -2039,6 +2173,100 @@
   mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
 }
 
+/// An attacker may speculatively store over a value that is then speculatively
+/// loaded and used as the target of an indirect call or jump instruction. This
+/// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
+/// in this paper:
+/// https://people.csail.mit.edu/vlk/spectre11.pdf
+///
+/// When this happens, the speculative
+/// execution of the call or jump will end up being steered to this attacker
+/// controlled address. While most such loads will be adequately hardened
+/// already, we want to ensure that they are definitively treated as needing
+/// post-load hardening. While address hardening is sufficient to prevent secret
+/// data from leaking to the attacker, it may not be sufficient to prevent an
+/// attacker from steering speculative execution. We forcibly unfolded all
+/// relevant loads above and so will always have an opportunity to post-load
+/// harden here, we just need to scan for cases not already flagged and add
+/// them.
+void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
+    MachineInstr &MI,
+    SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
+  switch (MI.getOpcode()) {
+  case X86::FARCALL16m:
+  case X86::FARCALL32m:
+  case X86::FARCALL64:
+  case X86::FARJMP16m:
+  case X86::FARJMP32m:
+  case X86::FARJMP64:
+    // We don't need to harden either far calls or far jumps as they are
+    // safe from Spectre.
+    return;
+
+  default:
+    break;
+  }
+
+  // We should never see a loading instruction at this point, as those should
+  // have been unfolded.
+  assert(!MI.mayLoad() && "Found a lingering loading instruction!");
+
+  // If the first operand isn't a register, this is a branch or call
+  // instruction with an immediate operand which doesn't need to be hardened.
+  if (!MI.getOperand(0).isReg())
+    return;
+
+  // For all of these, the target register is zero.
+  auto &TargetOp = MI.getOperand(0);
+  unsigned OldTargetReg = TargetOp.getReg();
+  assert(canHardenRegister(OldTargetReg) &&
+         "Cannot harden this instruction's target register!");
+
+  // If we have already hardened the target register, there is nothing to do.
+  if (AddrRegToHardenedReg.count(OldTargetReg))
+    return;
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc Loc = MI.getDebugLoc();
+  auto InsertPt = MI.getIterator();
+
+  // FIXME: All the hardening code is essentially duplicated with the post-load
+  // hardening code. Should refactor that to be sharable here.
+  auto *TargetRC = MRI->getRegClass(OldTargetReg);
+  int TargetRegBytes = TRI->getRegSizeInBits(*TargetRC) / 8;
+
+  unsigned FlagsReg = 0;
+  if (isEFLAGSLive(MBB, InsertPt, *TRI))
+    FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
+
+  unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
+  // FIXME: Need to teach this about 32-bit mode.
+  if (TargetRegBytes != 8) {
+    unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
+    unsigned SubRegImm = SubRegImms[Log2_32(TargetRegBytes)];
+    unsigned NarrowStateReg = MRI->createVirtualRegister(TargetRC);
+    BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg)
+        .addReg(StateReg, 0, SubRegImm);
+    StateReg = NarrowStateReg;
+  }
+
+  unsigned NewTargetReg = MRI->createVirtualRegister(TargetRC);
+  unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
+  unsigned OrOpCode = OrOpCodes[Log2_32(TargetRegBytes)];
+  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewTargetReg)
+                 .addReg(StateReg)
+                 .addReg(OldTargetReg);
+  OrI->addRegisterDead(X86::EFLAGS, TRI);
+  TargetOp.setReg(NewTargetReg);
+  ++NumInstsInserted;
+  LLVM_DEBUG(dbgs() << "  Inserting or: "; OrI->dump(); dbgs() << "\n");
+
+  if (FlagsReg)
+    restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
+
+  ++NumCallsOrJumpsHardened;
+}
+
 INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,
                       "X86 speculative load hardener", false, false)
 INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,
Index: llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll
===================================================================
--- llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll
+++ llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll
@@ -15,15 +15,20 @@
 define i32 @test_indirect_call(i32 ()** %ptr) nounwind {
 ; X64-LABEL: test_indirect_call:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rbx
-; X64-NEXT:    movq %rsp, %rbx
-; X64-NEXT:    movq $-1, %rax
-; X64-NEXT:    sarq $63, %rbx
-; X64-NEXT:    orq %rbx, %rdi
-; X64-NEXT:    callq *(%rdi)
-; X64-NEXT:    shlq $47, %rbx
-; X64-NEXT:    orq %rbx, %rsp
-; X64-NEXT:    popq %rbx
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    movq $-1, %rcx
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    movq (%rdi), %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq *%rcx
+; X64-NEXT:    movq %rsp, %rcx
+; X64-NEXT:    sarq $63, %rcx
+; X64-NEXT:    shlq $47, %rcx
+; X64-NEXT:    orq %rcx, %rsp
+; X64-NEXT:    popq %rcx
 ; X64-NEXT:    retq
 entry:
   %fp = load i32 ()*, i32 ()** %ptr
@@ -37,9 +42,11 @@
 ; X64-NEXT:    movq %rsp, %rax
 ; X64-NEXT:    movq $-1, %rcx
 ; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    movq (%rdi), %rcx
+; X64-NEXT:    orq %rax, %rcx
 ; X64-NEXT:    shlq $47, %rax
 ; X64-NEXT:    orq %rax, %rsp
-; X64-NEXT:    jmpq *(%rdi) # TAILCALL
+; X64-NEXT:    jmpq *%rcx # TAILCALL
 entry:
   %fp = load i32 ()*, i32 ()** %ptr
   %v = tail call i32 %fp()
@@ -53,9 +60,11 @@
 ; X64-NEXT:    movq %rsp, %rax
 ; X64-NEXT:    movq $-1, %rcx
 ; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    movq {{.*}}(%rip), %rcx
+; X64-NEXT:    orq %rax, %rcx
 ; X64-NEXT:    shlq $47, %rax
 ; X64-NEXT:    orq %rax, %rsp
-; X64-NEXT:    callq *{{.*}}(%rip)
+; X64-NEXT:    callq *%rcx
 ; X64-NEXT:    movq %rsp, %rcx
 ; X64-NEXT:    sarq $63, %rcx
 ; X64-NEXT:    shlq $47, %rcx
@@ -74,9 +83,11 @@
 ; X64-NEXT:    movq %rsp, %rax
 ; X64-NEXT:    movq $-1, %rcx
 ; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    movq {{.*}}(%rip), %rcx
+; X64-NEXT:    orq %rax, %rcx
 ; X64-NEXT:    shlq $47, %rax
 ; X64-NEXT:    orq %rax, %rsp
-; X64-NEXT:    jmpq *{{.*}}(%rip) # TAILCALL
+; X64-NEXT:    jmpq *%rcx # TAILCALL
 entry:
   %fp = load i32 ()*, i32 ()** @global_fnptr
   %v = tail call i32 %fp()
@@ -89,8 +100,9 @@
 ; X64-NEXT:    movq %rsp, %rcx
 ; X64-NEXT:    movq $-1, %rax
 ; X64-NEXT:    sarq $63, %rcx
-; X64-NEXT:    orq %rcx, %rdi
-; X64-NEXT:    jmpq *(%rdi)
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    orq %rcx, %rax
+; X64-NEXT:    jmpq *%rax
 ; X64-NEXT:  .LBB4_1: # %bb0
 ; X64-NEXT:    movl $2, %eax
 ; X64-NEXT:    jmp .LBB4_2
@@ -130,8 +142,9 @@
 ; X64-NEXT:    movq $-1, %rax
 ; X64-NEXT:    sarq $63, %rcx
 ; X64-NEXT:    movslq %edi, %rax
+; X64-NEXT:    movq global_blockaddrs(,%rax,8), %rax
 ; X64-NEXT:    orq %rcx, %rax
-; X64-NEXT:    jmpq *global_blockaddrs(,%rax,8)
+; X64-NEXT:    jmpq *%rax
 ; X64-NEXT:  .Ltmp0: # Block address taken
 ; X64-NEXT:  .LBB5_1: # %bb0
 ; X64-NEXT:    movl $2, %eax
@@ -182,8 +195,9 @@
 ; X64-NEXT:  # %bb.1: # %entry
 ; X64-NEXT:    cmovaq %rax, %rcx
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movq .LJTI6_0(,%rax,8), %rax
 ; X64-NEXT:    orq %rcx, %rax
-; X64-NEXT:    jmpq *.LJTI6_0(,%rax,8)
+; X64-NEXT:    jmpq *%rax
 ; X64-NEXT:  .LBB6_3: # %bb1
 ; X64-NEXT:    movl $7, %eax
 ; X64-NEXT:    jmp .LBB6_4