Index: llvm/include/llvm/CodeGen/IndirectThunks.h =================================================================== --- llvm/include/llvm/CodeGen/IndirectThunks.h +++ llvm/include/llvm/CodeGen/IndirectThunks.h @@ -25,14 +25,14 @@ Derived &getDerived() { return *static_cast(this); } protected: - bool InsertedThunks; + unsigned InsertedThunks; void doInitialization(Module &M) {} void createThunkFunction(MachineModuleInfo &MMI, StringRef Name, bool Comdat = true); public: void init(Module &M) { - InsertedThunks = false; + InsertedThunks = 0; getDerived().doInitialization(M); } // return `true` if `MMI` or `MF` was modified @@ -86,22 +86,19 @@ bool ThunkInserter::run(MachineModuleInfo &MMI, MachineFunction &MF) { // If MF is not a thunk, check to see if we need to insert a thunk. if (!MF.getName().startswith(getDerived().getThunkPrefix())) { - // If we've already inserted a thunk, nothing else to do. - if (InsertedThunks) - return false; - // Only add a thunk if one of the functions has the corresponding feature - // enabled in its subtarget, and doesn't enable external thunks. + // enabled in its subtarget, and doesn't enable external thunks. The target + // can use InsertedThunks to detect whether relevant thunks have already + // been insertd. // FIXME: Conditionalize on indirect calls so we don't emit a thunk when // nothing will end up calling it. // FIXME: It's a little silly to look at every function just to enumerate // the subtargets, but eventually we'll want to look at them for indirect // calls, so maybe this is OK. - if (!getDerived().mayUseThunk(MF)) + if (!getDerived().mayUseThunk(MF, InsertedThunks)) return false; - getDerived().insertThunks(MMI); - InsertedThunks = true; + InsertedThunks |= getDerived().insertThunks(MMI, MF); return true; } Index: llvm/lib/Target/AArch64/AArch64SLSHardening.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64SLSHardening.cpp +++ llvm/lib/Target/AArch64/AArch64SLSHardening.cpp @@ -185,13 +185,15 @@ namespace { struct SLSBLRThunkInserter : ThunkInserter { const char *getThunkPrefix() { return SLSBLRNamePrefix; } - bool mayUseThunk(const MachineFunction &MF) { + bool mayUseThunk(const MachineFunction &MF, unsigned InsertedThunks) { + if (InsertedThunks) + return false; ComdatThunks &= !MF.getSubtarget().hardenSlsNoComdat(); // FIXME: This could also check if there are any BLRs in the function // to more accurately reflect if a thunk will be needed. return MF.getSubtarget().hardenSlsBlr(); } - void insertThunks(MachineModuleInfo &MMI); + unsigned insertThunks(MachineModuleInfo &MMI, MachineFunction &MF); void populateThunk(MachineFunction &MF); private: @@ -199,12 +201,14 @@ }; } // namespace -void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) { +unsigned SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI, + MachineFunction &MF) { // FIXME: It probably would be possible to filter which thunks to produce // based on which registers are actually used in BLR instructions in this // function. But would that be a worthwhile optimization? for (auto T : SLSBLRThunks) createThunkFunction(MMI, T.Name, ComdatThunks); + return 1; } void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) { Index: llvm/lib/Target/ARM/ARMSLSHardening.cpp =================================================================== --- llvm/lib/Target/ARM/ARMSLSHardening.cpp +++ llvm/lib/Target/ARM/ARMSLSHardening.cpp @@ -164,13 +164,16 @@ namespace { struct SLSBLRThunkInserter : ThunkInserter { const char *getThunkPrefix() { return SLSBLRNamePrefix; } - bool mayUseThunk(const MachineFunction &MF) { + bool mayUseThunk(const MachineFunction &MF, unsigned InsertedThunks) { + if ((InsertedThunks & 0x1 && !MF.getSubtarget().isThumb()) || + (InsertedThunks & 0x2 && MF.getSubtarget().isThumb())) + return false; ComdatThunks &= !MF.getSubtarget().hardenSlsNoComdat(); // FIXME: This could also check if there are any indirect calls in the // function to more accurately reflect if a thunk will be needed. return MF.getSubtarget().hardenSlsBlr(); } - void insertThunks(MachineModuleInfo &MMI); + unsigned insertThunks(MachineModuleInfo &MMI, MachineFunction &MF); void populateThunk(MachineFunction &MF); private: @@ -178,12 +181,16 @@ }; } // namespace -void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) { +unsigned SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI, + MachineFunction &MF) { // FIXME: It probably would be possible to filter which thunks to produce // based on which registers are actually used in indirect calls in this // function. But would that be a worthwhile optimization? + const ARMSubtarget *ST = &MF.getSubtarget(); for (auto T : SLSBLRThunks) - createThunkFunction(MMI, T.Name, ComdatThunks); + if (ST->isThumb() == T.isThumb) + createThunkFunction(MMI, T.Name, ComdatThunks); + return ST->isThumb() ? 2 : 1; } void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) { Index: llvm/lib/Target/X86/X86IndirectThunks.cpp =================================================================== --- llvm/lib/Target/X86/X86IndirectThunks.cpp +++ llvm/lib/Target/X86/X86IndirectThunks.cpp @@ -61,23 +61,28 @@ namespace { struct RetpolineThunkInserter : ThunkInserter { const char *getThunkPrefix() { return RetpolineNamePrefix; } - bool mayUseThunk(const MachineFunction &MF) { + bool mayUseThunk(const MachineFunction &MF, unsigned InsertedThunks) { + if (InsertedThunks) + return false; const auto &STI = MF.getSubtarget(); return (STI.useRetpolineIndirectCalls() || STI.useRetpolineIndirectBranches()) && !STI.useRetpolineExternalThunk(); } - void insertThunks(MachineModuleInfo &MMI); + unsigned insertThunks(MachineModuleInfo &MMI, MachineFunction &MF); void populateThunk(MachineFunction &MF); }; struct LVIThunkInserter : ThunkInserter { const char *getThunkPrefix() { return LVIThunkNamePrefix; } - bool mayUseThunk(const MachineFunction &MF) { + bool mayUseThunk(const MachineFunction &MF, unsigned InsertedThunks) { + if (InsertedThunks) + return false; return MF.getSubtarget().useLVIControlFlowIntegrity(); } - void insertThunks(MachineModuleInfo &MMI) { + unsigned insertThunks(MachineModuleInfo &MMI, MachineFunction &MF) { createThunkFunction(MMI, R11LVIThunkName); + return 1; } void populateThunk(MachineFunction &MF) { assert (MF.size() == 1); @@ -132,13 +137,15 @@ } // end anonymous namespace -void RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI) { +unsigned RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI, + MachineFunction &MF) { if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64) createThunkFunction(MMI, R11RetpolineName); else for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName, EDIRetpolineName}) createThunkFunction(MMI, Name); + return 1; } void RetpolineThunkInserter::populateThunk(MachineFunction &MF) { Index: llvm/test/CodeGen/ARM/speculation-hardening-sls-boththunks.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/speculation-hardening-sls-boththunks.ll @@ -0,0 +1,17 @@ +; RUN: llc -mattr=harden-sls-retbr -mattr=harden-sls-blr -verify-machineinstrs -mtriple=armv8-linux-gnueabi < %s | FileCheck %s + +; Given both Arm and THumb functions in the same compilation unit, we should +; get both arm and thumb thunks. + +define i32 @test1(i32 %a, i32 %b) { + ret i32 %a +} + +define i32 @test2(i32 %a, i32 %b) "target-features"="+thumb-mode" { + ret i32 %a +} + +; CHECK: test1 +; CHECK: test2 +; CHECK: __llvm_slsblr_thunk_arm_sp +; CHECK: __llvm_slsblr_thunk_thumb_sp \ No newline at end of file Index: llvm/test/CodeGen/ARM/speculation-hardening-sls.ll =================================================================== --- llvm/test/CodeGen/ARM/speculation-hardening-sls.ll +++ llvm/test/CodeGen/ARM/speculation-hardening-sls.ll @@ -259,4 +259,5 @@ ; SB-NEXT: isb ; HARDEN-NEXT: .Lfunc_end - +; THUMB-NOT: __llvm_slsblr_thunk_arm +; ARM-NOT: __llvm_slsblr_thunk_thumb