Index: llvm/lib/Target/X86/X86RetpolineThunks.cpp =================================================================== --- llvm/lib/Target/X86/X86RetpolineThunks.cpp +++ llvm/lib/Target/X86/X86RetpolineThunks.cpp @@ -74,7 +74,7 @@ void createThunkFunction(Module &M, StringRef Name); void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg); - void populateThunk(MachineFunction &MF, Optional Reg = None); + void populateThunk(MachineFunction &MF, unsigned Reg); }; } // end anonymous namespace @@ -236,25 +236,33 @@ } void X86RetpolineThunks::populateThunk(MachineFunction &MF, - Optional Reg) { + unsigned Reg) { // Set MF properties. We never use vregs... MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); + // Grab the entry MBB and erase any other blocks. O0 codegen appears to + // generate two bbs for the entry block. MachineBasicBlock *Entry = &MF.front(); Entry->clear(); + while (MF.size() > 1) + MF.erase(std::next(MF.begin())); MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock()); MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock()); + MCSymbol *TargetSym = MF.getContext().createTempSymbol(); MF.push_back(CaptureSpec); MF.push_back(CallTarget); const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL; - BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addMBB(CallTarget); - Entry->addSuccessor(CallTarget); + Entry->addLiveIn(Reg); + BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym); + + // The MIR verifier thinks that the CALL in the entry block will fall through + // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is + // the successor, but the MIR verifier doesn't know how to cope with that. Entry->addSuccessor(CaptureSpec); - CallTarget->setHasAddressTaken(); // In the capture loop for speculation, we want to stop the processor from // speculating as fast as possible. On Intel processors, the PAUSE instruction @@ -270,7 +278,10 @@ CaptureSpec->setHasAddressTaken(); CaptureSpec->addSuccessor(CaptureSpec); + CallTarget->addLiveIn(Reg); + CallTarget->setHasAddressTaken(); CallTarget->setAlignment(4); - insertRegReturnAddrClobber(*CallTarget, *Reg); + insertRegReturnAddrClobber(*CallTarget, Reg); + CallTarget->back().setPreInstrSymbol(MF, TargetSym); BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc)); } Index: llvm/test/CodeGen/X86/retpoline.ll =================================================================== --- llvm/test/CodeGen/X86/retpoline.ll +++ llvm/test/CodeGen/X86/retpoline.ll @@ -1,8 +1,8 @@ -; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64 -; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64 +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST -; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86 -; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST +; RUN: llc -verify-machineinstrs -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86 +; RUN: llc -verify-machineinstrs -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST declare void @bar(i32) @@ -428,8 +428,9 @@ ; X64-NEXT: lfence ; X64-NEXT: jmp [[CAPTURE_SPEC]] ; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: [[CALL_TARGET]]: # Block address taken +; X64-NEXT: {{.*}} # Block address taken ; X64-NEXT: # %entry +; X64-NEXT: [[CALL_TARGET]]: ; X64-NEXT: movq %r11, (%rsp) ; X64-NEXT: retq ; @@ -446,8 +447,9 @@ ; X86-NEXT: lfence ; X86-NEXT: jmp [[CAPTURE_SPEC]] ; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: [[CALL_TARGET]]: # Block address taken +; X86-NEXT: {{.*}} # Block address taken ; X86-NEXT: # %entry +; X86-NEXT: [[CALL_TARGET]]: ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: retl ; @@ -464,8 +466,9 @@ ; X86-NEXT: lfence ; X86-NEXT: jmp [[CAPTURE_SPEC]] ; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: [[CALL_TARGET]]: # Block address taken +; X86-NEXT: {{.*}} # Block address taken ; X86-NEXT: # %entry +; X86-NEXT: [[CALL_TARGET]]: ; X86-NEXT: movl %ecx, (%esp) ; X86-NEXT: retl ; @@ -482,8 +485,9 @@ ; X86-NEXT: lfence ; X86-NEXT: jmp [[CAPTURE_SPEC]] ; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: [[CALL_TARGET]]: # Block address taken +; X86-NEXT: {{.*}} # Block address taken ; X86-NEXT: # %entry +; X86-NEXT: [[CALL_TARGET]]: ; X86-NEXT: movl %edx, (%esp) ; X86-NEXT: retl ; @@ -500,8 +504,9 @@ ; X86-NEXT: lfence ; X86-NEXT: jmp [[CAPTURE_SPEC]] ; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: [[CALL_TARGET]]: # Block address taken +; X86-NEXT: {{.*}} # Block address taken ; X86-NEXT: # %entry +; X86-NEXT: [[CALL_TARGET]]: ; X86-NEXT: movl %edi, (%esp) ; X86-NEXT: retl