Index: llvm/trunk/lib/Target/X86/X86RetpolineThunks.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86RetpolineThunks.cpp
+++ llvm/trunk/lib/Target/X86/X86RetpolineThunks.cpp
@@ -74,7 +74,7 @@
 
   void createThunkFunction(Module &M, StringRef Name);
   void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
-  void populateThunk(MachineFunction &MF, Optional<unsigned> Reg = None);
+  void populateThunk(MachineFunction &MF, unsigned Reg);
 };
 
 } // end anonymous namespace
@@ -236,25 +236,33 @@
 }
 
 void X86RetpolineThunks::populateThunk(MachineFunction &MF,
-                                       Optional<unsigned> Reg) {
+                                       unsigned Reg) {
   // Set MF properties. We never use vregs...
   MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
 
+  // Grab the entry MBB and erase any other blocks. O0 codegen appears to
+  // generate two bbs for the entry block.
   MachineBasicBlock *Entry = &MF.front();
   Entry->clear();
+  while (MF.size() > 1)
+    MF.erase(std::next(MF.begin()));
 
   MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
   MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+  MCSymbol *TargetSym = MF.getContext().createTempSymbol();
   MF.push_back(CaptureSpec);
   MF.push_back(CallTarget);
 
   const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
   const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
 
-  BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addMBB(CallTarget);
-  Entry->addSuccessor(CallTarget);
+  Entry->addLiveIn(Reg);
+  BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
+
+  // The MIR verifier thinks that the CALL in the entry block will fall through
+  // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
+  // the successor, but the MIR verifier doesn't know how to cope with that.
   Entry->addSuccessor(CaptureSpec);
-  CallTarget->setHasAddressTaken();
 
   // In the capture loop for speculation, we want to stop the processor from
   // speculating as fast as possible. On Intel processors, the PAUSE instruction
@@ -270,7 +278,10 @@
   CaptureSpec->setHasAddressTaken();
   CaptureSpec->addSuccessor(CaptureSpec);
 
+  CallTarget->addLiveIn(Reg);
+  CallTarget->setHasAddressTaken();
   CallTarget->setAlignment(4);
-  insertRegReturnAddrClobber(*CallTarget, *Reg);
+  insertRegReturnAddrClobber(*CallTarget, Reg);
+  CallTarget->back().setPreInstrSymbol(MF, TargetSym);
   BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
 }
Index: llvm/trunk/test/CodeGen/X86/retpoline-external.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/retpoline-external.ll
+++ llvm/trunk/test/CodeGen/X86/retpoline-external.ll
@@ -1,8 +1,8 @@
-; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
-; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
 
-; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
-; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
+; RUN: llc -verify-machineinstrs -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
+; RUN: llc -verify-machineinstrs -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
 
 declare void @bar(i32)
 
Index: llvm/trunk/test/CodeGen/X86/retpoline-regparm.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/retpoline-regparm.ll
+++ llvm/trunk/test/CodeGen/X86/retpoline-regparm.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s
+; RUN: llc -verify-machineinstrs -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s
 
 ; Test 32-bit retpoline when -mregparm=3 is used. This case is interesting
 ; because there are no available scratch registers.  The Linux kernel builds
Index: llvm/trunk/test/CodeGen/X86/retpoline.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/retpoline.ll
+++ llvm/trunk/test/CodeGen/X86/retpoline.ll
@@ -1,8 +1,8 @@
-; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
-; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
 
-; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
-; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
+; RUN: llc -verify-machineinstrs -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
+; RUN: llc -verify-machineinstrs -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
 
 declare void @bar(i32)
 
@@ -428,8 +428,9 @@
 ; X64-NEXT:          lfence
 ; X64-NEXT:          jmp     [[CAPTURE_SPEC]]
 ; X64-NEXT:          .p2align        4, 0x90
-; X64-NEXT:  [[CALL_TARGET]]:                        # Block address taken
+; X64-NEXT:  {{.*}}                                  # Block address taken
 ; X64-NEXT:                                          # %entry
+; X64-NEXT:  [[CALL_TARGET]]:
 ; X64-NEXT:          movq    %r11, (%rsp)
 ; X64-NEXT:          retq
 ;
@@ -446,8 +447,9 @@
 ; X86-NEXT:          lfence
 ; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
 ; X86-NEXT:          .p2align        4, 0x90
-; X86-NEXT:  [[CALL_TARGET]]:                        # Block address taken
+; X86-NEXT:  {{.*}}                                  # Block address taken
 ; X86-NEXT:                                          # %entry
+; X86-NEXT:  [[CALL_TARGET]]:
 ; X86-NEXT:          movl    %eax, (%esp)
 ; X86-NEXT:          retl
 ;
@@ -464,8 +466,9 @@
 ; X86-NEXT:          lfence
 ; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
 ; X86-NEXT:          .p2align        4, 0x90
-; X86-NEXT:  [[CALL_TARGET]]:                        # Block address taken
+; X86-NEXT:  {{.*}}                                  # Block address taken
 ; X86-NEXT:                                          # %entry
+; X86-NEXT:  [[CALL_TARGET]]:
 ; X86-NEXT:          movl    %ecx, (%esp)
 ; X86-NEXT:          retl
 ;
@@ -482,8 +485,9 @@
 ; X86-NEXT:          lfence
 ; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
 ; X86-NEXT:          .p2align        4, 0x90
-; X86-NEXT:  [[CALL_TARGET]]:                        # Block address taken
+; X86-NEXT:  {{.*}}                                  # Block address taken
 ; X86-NEXT:                                          # %entry
+; X86-NEXT:  [[CALL_TARGET]]:
 ; X86-NEXT:          movl    %edx, (%esp)
 ; X86-NEXT:          retl
 ;
@@ -500,8 +504,9 @@
 ; X86-NEXT:          lfence
 ; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
 ; X86-NEXT:          .p2align        4, 0x90
-; X86-NEXT:  [[CALL_TARGET]]:                        # Block address taken
+; X86-NEXT:  {{.*}}                                  # Block address taken
 ; X86-NEXT:                                          # %entry
+; X86-NEXT:  [[CALL_TARGET]]:
 ; X86-NEXT:          movl    %edi, (%esp)
 ; X86-NEXT:          retl