Index: llvm/include/llvm/CodeGen/IndirectThunks.h
===================================================================
--- llvm/include/llvm/CodeGen/IndirectThunks.h
+++ llvm/include/llvm/CodeGen/IndirectThunks.h
@@ -25,14 +25,14 @@
   Derived &getDerived() { return *static_cast<Derived *>(this); }
 
 protected:
-  bool InsertedThunks;
+  unsigned InsertedThunks;
   void doInitialization(Module &M) {}
   void createThunkFunction(MachineModuleInfo &MMI, StringRef Name,
                            bool Comdat = true);
 
 public:
   void init(Module &M) {
-    InsertedThunks = false;
+    InsertedThunks = 0;
     getDerived().doInitialization(M);
   }
   // return `true` if `MMI` or `MF` was modified
@@ -86,22 +86,19 @@
 bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) {
   // If MF is not a thunk, check to see if we need to insert a thunk.
   if (!MF.getName().startswith(getDerived().getThunkPrefix())) {
-    // If we've already inserted a thunk, nothing else to do.
-    if (InsertedThunks)
-      return false;
-
     // Only add a thunk if one of the functions has the corresponding feature
-    // enabled in its subtarget, and doesn't enable external thunks.
+    // enabled in its subtarget, and doesn't enable external thunks. The target
+    // can use InsertedThunks to detect whether relevant thunks have already
+    // been insertd.
     // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
     // nothing will end up calling it.
     // FIXME: It's a little silly to look at every function just to enumerate
     // the subtargets, but eventually we'll want to look at them for indirect
     // calls, so maybe this is OK.
-    if (!getDerived().mayUseThunk(MF))
+    if (!getDerived().mayUseThunk(MF, InsertedThunks))
       return false;
 
-    getDerived().insertThunks(MMI);
-    InsertedThunks = true;
+    InsertedThunks |= getDerived().insertThunks(MMI, MF);
     return true;
   }
 
Index: llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
+++ llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
@@ -185,13 +185,15 @@
 namespace {
 struct SLSBLRThunkInserter : ThunkInserter<SLSBLRThunkInserter> {
   const char *getThunkPrefix() { return SLSBLRNamePrefix; }
-  bool mayUseThunk(const MachineFunction &MF) {
+  bool mayUseThunk(const MachineFunction &MF, unsigned InsertedThunks) {
+    if (InsertedThunks)
+      return false;
     ComdatThunks &= !MF.getSubtarget<AArch64Subtarget>().hardenSlsNoComdat();
     // FIXME: This could also check if there are any BLRs in the function
     // to more accurately reflect if a thunk will be needed.
     return MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr();
   }
-  void insertThunks(MachineModuleInfo &MMI);
+  unsigned insertThunks(MachineModuleInfo &MMI, MachineFunction &MF);
   void populateThunk(MachineFunction &MF);
 
 private:
@@ -199,12 +201,14 @@
 };
 } // namespace
 
-void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) {
+unsigned SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI,
+                                           MachineFunction &MF) {
   // FIXME: It probably would be possible to filter which thunks to produce
   // based on which registers are actually used in BLR instructions in this
   // function. But would that be a worthwhile optimization?
   for (auto T : SLSBLRThunks)
     createThunkFunction(MMI, T.Name, ComdatThunks);
+  return 1;
 }
 
 void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
Index: llvm/lib/Target/ARM/ARMSLSHardening.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMSLSHardening.cpp
+++ llvm/lib/Target/ARM/ARMSLSHardening.cpp
@@ -164,13 +164,16 @@
 namespace {
 struct SLSBLRThunkInserter : ThunkInserter<SLSBLRThunkInserter> {
   const char *getThunkPrefix() { return SLSBLRNamePrefix; }
-  bool mayUseThunk(const MachineFunction &MF) {
+  bool mayUseThunk(const MachineFunction &MF, unsigned InsertedThunks) {
+    if ((InsertedThunks & 0x1 && !MF.getSubtarget<ARMSubtarget>().isThumb()) ||
+        (InsertedThunks & 0x2 && MF.getSubtarget<ARMSubtarget>().isThumb()))
+      return false;
     ComdatThunks &= !MF.getSubtarget<ARMSubtarget>().hardenSlsNoComdat();
     // FIXME: This could also check if there are any indirect calls in the
     // function to more accurately reflect if a thunk will be needed.
     return MF.getSubtarget<ARMSubtarget>().hardenSlsBlr();
   }
-  void insertThunks(MachineModuleInfo &MMI);
+  unsigned insertThunks(MachineModuleInfo &MMI, MachineFunction &MF);
   void populateThunk(MachineFunction &MF);
 
 private:
@@ -178,12 +181,16 @@
 };
 } // namespace
 
-void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) {
+unsigned SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI,
+                                           MachineFunction &MF) {
   // FIXME: It probably would be possible to filter which thunks to produce
   // based on which registers are actually used in indirect calls in this
   // function. But would that be a worthwhile optimization?
+  const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
   for (auto T : SLSBLRThunks)
-    createThunkFunction(MMI, T.Name, ComdatThunks);
+    if (ST->isThumb() == T.isThumb)
+      createThunkFunction(MMI, T.Name, ComdatThunks);
+  return ST->isThumb() ? 2 : 1;
 }
 
 void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
Index: llvm/lib/Target/X86/X86IndirectThunks.cpp
===================================================================
--- llvm/lib/Target/X86/X86IndirectThunks.cpp
+++ llvm/lib/Target/X86/X86IndirectThunks.cpp
@@ -61,23 +61,28 @@
 namespace {
 struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> {
   const char *getThunkPrefix() { return RetpolineNamePrefix; }
-  bool mayUseThunk(const MachineFunction &MF) {
+  bool mayUseThunk(const MachineFunction &MF, unsigned InsertedThunks) {
+    if (InsertedThunks)
+      return false;
     const auto &STI = MF.getSubtarget<X86Subtarget>();
     return (STI.useRetpolineIndirectCalls() ||
             STI.useRetpolineIndirectBranches()) &&
            !STI.useRetpolineExternalThunk();
   }
-  void insertThunks(MachineModuleInfo &MMI);
+  unsigned insertThunks(MachineModuleInfo &MMI, MachineFunction &MF);
   void populateThunk(MachineFunction &MF);
 };
 
 struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> {
   const char *getThunkPrefix() { return LVIThunkNamePrefix; }
-  bool mayUseThunk(const MachineFunction &MF) {
+  bool mayUseThunk(const MachineFunction &MF, unsigned InsertedThunks) {
+    if (InsertedThunks)
+      return false;
     return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity();
   }
-  void insertThunks(MachineModuleInfo &MMI) {
+  unsigned insertThunks(MachineModuleInfo &MMI, MachineFunction &MF) {
     createThunkFunction(MMI, R11LVIThunkName);
+    return 1;
   }
   void populateThunk(MachineFunction &MF) {
     assert (MF.size() == 1);
@@ -132,13 +137,15 @@
 
 } // end anonymous namespace
 
-void RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI) {
+unsigned RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI,
+                                              MachineFunction &MF) {
   if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64)
     createThunkFunction(MMI, R11RetpolineName);
   else
     for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName,
                            EDIRetpolineName})
       createThunkFunction(MMI, Name);
+  return 1;
 }
 
 void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
Index: llvm/test/CodeGen/ARM/speculation-hardening-sls-boththunks.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/speculation-hardening-sls-boththunks.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mattr=harden-sls-retbr -mattr=harden-sls-blr -verify-machineinstrs -mtriple=armv8-linux-gnueabi < %s | FileCheck %s
+
+; Given both Arm and THumb functions in the same compilation unit, we should
+; get both arm and thumb thunks.
+
+define i32 @test1(i32 %a, i32 %b) {
+  ret i32 %a
+}
+
+define i32 @test2(i32 %a, i32 %b) "target-features"="+thumb-mode" {
+  ret i32 %a
+}
+
+; CHECK: test1
+; CHECK: test2
+; CHECK: __llvm_slsblr_thunk_arm_sp
+; CHECK: __llvm_slsblr_thunk_thumb_sp
\ No newline at end of file
Index: llvm/test/CodeGen/ARM/speculation-hardening-sls.ll
===================================================================
--- llvm/test/CodeGen/ARM/speculation-hardening-sls.ll
+++ llvm/test/CodeGen/ARM/speculation-hardening-sls.ll
@@ -259,4 +259,5 @@
 ; SB-NEXT:     isb
 ; HARDEN-NEXT: .Lfunc_end
 
-
+; THUMB-NOT: __llvm_slsblr_thunk_arm
+; ARM-NOT: __llvm_slsblr_thunk_thumb