Index: llvm/trunk/lib/Target/X86/X86.h =================================================================== --- llvm/trunk/lib/Target/X86/X86.h +++ llvm/trunk/lib/Target/X86/X86.h @@ -108,7 +108,7 @@ FunctionPass *createX86EvexToVexInsts(); /// This pass creates the thunks for the retpoline feature. -ModulePass *createX86RetpolineThunksPass(); +FunctionPass *createX86RetpolineThunksPass(); InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &, Index: llvm/trunk/lib/Target/X86/X86RetpolineThunks.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86RetpolineThunks.cpp +++ llvm/trunk/lib/Target/X86/X86RetpolineThunks.cpp @@ -38,18 +38,27 @@ #define DEBUG_TYPE "x86-retpoline-thunks" +static const char ThunkNamePrefix[] = "__llvm_retpoline_"; +static const char R11ThunkName[] = "__llvm_retpoline_r11"; +static const char EAXThunkName[] = "__llvm_retpoline_eax"; +static const char ECXThunkName[] = "__llvm_retpoline_ecx"; +static const char EDXThunkName[] = "__llvm_retpoline_edx"; +static const char PushThunkName[] = "__llvm_retpoline_push"; + namespace { -class X86RetpolineThunks : public ModulePass { +class X86RetpolineThunks : public MachineFunctionPass { public: static char ID; - X86RetpolineThunks() : ModulePass(ID) {} + X86RetpolineThunks() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "X86 Retpoline Thunks"; } - bool runOnModule(Module &M) override; + bool doInitialization(Module &M) override; + bool runOnMachineFunction(MachineFunction &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired(); AU.addPreserved(); } @@ -61,51 +70,74 @@ const X86Subtarget *STI; const X86InstrInfo *TII; - Function *createThunkFunction(Module &M, StringRef Name); + bool InsertedThunks; + + void createThunkFunction(Module &M, StringRef Name); void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg); void insert32BitPushReturnAddrClobber(MachineBasicBlock &MBB); - void createThunk(Module &M, StringRef NameSuffix, - Optional Reg = None); + void populateThunk(MachineFunction &MF, Optional Reg = None); }; } // end anonymous namespace -ModulePass *llvm::createX86RetpolineThunksPass() { +FunctionPass *llvm::createX86RetpolineThunksPass() { return new X86RetpolineThunks(); } char X86RetpolineThunks::ID = 0; -bool X86RetpolineThunks::runOnModule(Module &M) { - DEBUG(dbgs() << getPassName() << '\n'); +bool X86RetpolineThunks::doInitialization(Module &M) { + InsertedThunks = false; + return false; +} - auto *TPC = getAnalysisIfAvailable(); - assert(TPC && "X86-specific target pass should not be run without a target " - "pass config!"); +bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << getPassName() << '\n'); - MMI = &getAnalysis(); - TM = &TPC->getTM(); + TM = &MF.getTarget();; + STI = &MF.getSubtarget(); + TII = STI->getInstrInfo(); Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64; - // Only add a thunk if we have at least one function that has the retpoline - // feature enabled in its subtarget. - // FIXME: Conditionalize on indirect calls so we don't emit a thunk when - // nothing will end up calling it. - // FIXME: It's a little silly to look at every function just to enumerate - // the subtargets, but eventually we'll want to look at them for indirect - // calls, so maybe this is OK. - if (!llvm::any_of(M, [&](const Function &F) { - // Save the subtarget we find for use in emitting the subsequent - // thunk. - STI = &TM->getSubtarget(F); - return STI->useRetpoline() && !STI->useRetpolineExternalThunk(); - })) - return false; + MMI = &getAnalysis(); + Module &M = const_cast(*MMI->getModule()); - // If we have a relevant subtarget, get the instr info as well. - TII = STI->getInstrInfo(); + // If this function is not a thunk, check to see if we need to insert + // a thunk. + if (!MF.getName().startswith(ThunkNamePrefix)) { + // If we've already inserted a thunk, nothing else to do. + if (InsertedThunks) + return false; + + // Only add a thunk if one of the functions has the retpoline feature + // enabled in its subtarget, and doesn't enable external thunks. + // FIXME: Conditionalize on indirect calls so we don't emit a thunk when + // nothing will end up calling it. + // FIXME: It's a little silly to look at every function just to enumerate + // the subtargets, but eventually we'll want to look at them for indirect + // calls, so maybe this is OK. + if (!STI->useRetpoline() || STI->useRetpolineExternalThunk()) + return false; + + // Otherwise, we need to insert the thunk. + // WARNING: This is not really a well behaving thing to do in a function + // pass. We extract the module and insert a new function (and machine + // function) directly into the module. + if (Is64Bit) + createThunkFunction(M, R11ThunkName); + else + for (StringRef Name : + {EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName}) + createThunkFunction(M, Name); + InsertedThunks = true; + return true; + } + // If this *is* a thunk function, we need to populate it with the correct MI. if (Is64Bit) { + assert(MF.getName() == "__llvm_retpoline_r11" && + "Should only have an r11 thunk on 64-bit targets"); + // __llvm_retpoline_r11: // callq .Lr11_call_target // .Lr11_capture_spec: @@ -116,8 +148,7 @@ // .Lr11_call_target: // movq %r11, (%rsp) // retq - - createThunk(M, "r11", X86::R11); + populateThunk(MF, X86::R11); } else { // For 32-bit targets we need to emit a collection of thunks for various // possible scratch registers as well as a fallback that is used when @@ -161,16 +192,25 @@ // popl 8(%esp) # Pop RA to final RA // popl (%esp) # Pop callee to next top of stack // retl # Ret to callee - createThunk(M, "eax", X86::EAX); - createThunk(M, "ecx", X86::ECX); - createThunk(M, "edx", X86::EDX); - createThunk(M, "push"); + if (MF.getName() == EAXThunkName) + populateThunk(MF, X86::EAX); + else if (MF.getName() == ECXThunkName) + populateThunk(MF, X86::ECX); + else if (MF.getName() == EDXThunkName) + populateThunk(MF, X86::EDX); + else if (MF.getName() == PushThunkName) + populateThunk(MF); + else + llvm_unreachable("Invalid thunk name on x86-32!"); } return true; } -Function *X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) { +void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) { + assert(Name.startswith(ThunkNamePrefix) && + "Created a thunk with an unexpected prefix!"); + LLVMContext &Ctx = M.getContext(); auto Type = FunctionType::get(Type::getVoidTy(Ctx), false); Function *F = @@ -190,7 +230,6 @@ IRBuilder<> Builder(Entry); Builder.CreateRetVoid(); - return F; } void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB, @@ -200,6 +239,7 @@ addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0) .addReg(Reg); } + void X86RetpolineThunks::insert32BitPushReturnAddrClobber( MachineBasicBlock &MBB) { // The instruction sequence we use to replace the return address without @@ -225,21 +265,16 @@ false, 0); } -void X86RetpolineThunks::createThunk(Module &M, StringRef NameSuffix, - Optional Reg) { - Function &F = - *createThunkFunction(M, (Twine("__llvm_retpoline_") + NameSuffix).str()); - MachineFunction &MF = MMI->getOrCreateMachineFunction(F); - +void X86RetpolineThunks::populateThunk(MachineFunction &MF, + Optional Reg) { // Set MF properties. We never use vregs... MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); - BasicBlock &OrigEntryBB = F.getEntryBlock(); - MachineBasicBlock *Entry = MF.CreateMachineBasicBlock(&OrigEntryBB); - MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(&OrigEntryBB); - MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(&OrigEntryBB); + MachineBasicBlock *Entry = &MF.front(); + Entry->clear(); - MF.push_back(Entry); + MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock()); + MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock()); MF.push_back(CaptureSpec); MF.push_back(CallTarget); Index: llvm/trunk/test/CodeGen/X86/O0-pipeline.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/O0-pipeline.ll +++ llvm/trunk/test/CodeGen/X86/O0-pipeline.ll @@ -59,8 +59,7 @@ ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Insert XRay ops ; CHECK-NEXT: Implement the 'patchable-function' attribute -; CHECK-NEXT: X86 Retpoline Thunks -; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: X86 Retpoline Thunks ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: MachineDominator Tree Construction