diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -39,6 +39,7 @@
 FunctionPass *createAArch64StorePairSuppressPass();
 FunctionPass *createAArch64ExpandPseudoPass();
 FunctionPass *createAArch64SLSHardeningPass();
+FunctionPass *createAArch64IndirectThunks();
 FunctionPass *createAArch64SpeculationHardeningPass();
 FunctionPass *createAArch64LoadStoreOptimizationPass();
 FunctionPass *createAArch64SIMDInstrOptPass();
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -464,6 +464,9 @@
 def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr",
   "HardenSlsRetBr", "true",
   "Harden against straight line speculation across RET and BR instructions">;
+def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr",
+  "HardenSlsBlr", "true",
+  "Harden against straight line speculation across BLR instructions">;
 
 //===----------------------------------------------------------------------===//
 // AArch64 Processors supported.
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3270,7 +3270,7 @@
   // Issue the call.
   MachineInstrBuilder MIB;
   if (Subtarget->useSmallAddressing()) {
-    const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
+    const MCInstrDesc &II = TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : AArch64::BL);
     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
     if (Symbol)
       MIB.addSym(Symbol, 0);
@@ -3303,7 +3303,7 @@
     if (!CallReg)
       return false;
 
-    const MCInstrDesc &II = TII.get(AArch64::BLR);
+    const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
     CallReg = constrainOperandRegClass(II, CallReg, 0);
     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
   }
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1126,7 +1126,7 @@
             .setMIFlag(MachineInstr::FrameSetup);
       }
 
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
+      BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
           .addReg(AArch64::X16, RegState::Kill)
           .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
           .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -397,6 +397,9 @@
   return false;
 }
 
+/// Return opcode to be used for indirect calls.
+unsigned getBLRCallOpcode(const MachineFunction &MF);
+
 // struct TSFlags {
 #define TSFLAG_ELEMENT_SIZE_TYPE(X)      (X)       // 3-bits
 #define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bit
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6092,7 +6092,9 @@
   }
 
   else if (LastInstrOpcode == AArch64::BL ||
-           (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
+           ((LastInstrOpcode == AArch64::BLR ||
+             LastInstrOpcode == AArch64::BLRNoIP) &&
+            !HasBTI)) {
     // FIXME: Do we need to check if the code after this uses the value of LR?
     FrameID = MachineOutlinerThunk;
     NumBytesToCreateFrame = 0;
@@ -6409,7 +6411,8 @@
     // as a tail-call.  Whitelist the call instructions we know about so we
     // don't get unexpected results with call pseudo-instructions.
     auto UnknownCallOutlineType = outliner::InstrType::Illegal;
-    if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
+    if (MI.getOpcode() == AArch64::BLR ||
+        MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
       UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
 
     if (!Callee)
@@ -6557,7 +6560,8 @@
     if (Call->getOpcode() == AArch64::BL) {
       TailOpcode = AArch64::TCRETURNdi;
     } else {
-      assert(Call->getOpcode() == AArch64::BLR);
+      assert(Call->getOpcode() == AArch64::BLR ||
+             Call->getOpcode() == AArch64::BLRNoIP);
       TailOpcode = AArch64::TCRETURNriALL;
     }
     MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
@@ -6893,6 +6897,13 @@
   return get(Opc).TSFlags & AArch64::ElementSizeMask;
 }
 
+unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
+  if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
+    return AArch64::BLRNoIP;
+  else
+    return AArch64::BLR;
+}
+
 #define GET_INSTRINFO_HELPERS
 #define GET_INSTRMAP_INFO
 #include "AArch64GenInstrInfo.inc"
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -589,6 +589,8 @@
   def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
   def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
 
+  def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
+  def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
   // Toggles patterns which aren't beneficial in GlobalISel when we aren't
   // optimizing. This allows us to selectively use patterns without impacting
   // SelectionDAG's behaviour.
@@ -2020,9 +2022,19 @@
 def : InstAlias<"ret", (RET LR)>;
 
 let isCall = 1, Defs = [LR], Uses = [SP] in {
-def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>;
+  def BLR : BranchReg<0b0001, "blr", []>;
+  def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
+                Sched<[WriteBrReg]>,
+                PseudoInstExpansion<(BLR GPR64:$Rn)>;
 } // isCall
 
+def : Pat<(AArch64call GPR64:$Rn),
+          (BLR GPR64:$Rn)>,
+      Requires<[NoSLSBLRMitigation]>;
+def : Pat<(AArch64call GPR64noip:$Rn),
+          (BLRNoIP GPR64noip:$Rn)>,
+      Requires<[SLSBLRMitigation]>;
+
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
 def BR  : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
 } // isBranch, isTerminator, isBarrier, isIndirectBranch
diff --git a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
--- a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
@@ -16,6 +16,7 @@
 #include "Utils/AArch64BaseInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/IndirectThunks.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -57,9 +58,9 @@
 
 private:
   bool hardenReturnsAndBRs(MachineBasicBlock &MBB) const;
-  void insertSpeculationBarrier(MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator MBBI,
-                                DebugLoc DL) const;
+  bool hardenBLRs(MachineBasicBlock &MBB) const;
+  MachineBasicBlock &ConvertBLRToBL(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator) const;
 };
 
 } // end anonymous namespace
@@ -69,20 +70,26 @@
 INITIALIZE_PASS(AArch64SLSHardening, "aarch64-sls-hardening",
                 AARCH64_SLS_HARDENING_NAME, false, false)
 
-void AArch64SLSHardening::insertSpeculationBarrier(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    DebugLoc DL) const {
+static void insertSpeculationBarrier(const AArch64Subtarget *ST,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MBBI,
+                                     DebugLoc DL,
+                                     bool AlwaysUseISBDSB = false) {
   assert(MBBI != MBB.begin() &&
          "Must not insert SpeculationBarrierEndBB as only instruction in MBB.");
   assert(std::prev(MBBI)->isBarrier() &&
          "SpeculationBarrierEndBB must only follow unconditional control flow "
          "instructions.");
   assert(std::prev(MBBI)->isTerminator() &&
-         "SpeculatoinBarrierEndBB must only follow terminators.");
-  if (ST->hasSB())
-    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SpeculationBarrierSBEndBB));
-  else
-    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SpeculationBarrierISBDSBEndBB));
+         "SpeculationBarrierEndBB must only follow terminators.");
+  const TargetInstrInfo *TII = ST->getInstrInfo();
+  unsigned BarrierOpc = ST->hasSB() && !AlwaysUseISBDSB
+                            ? AArch64::SpeculationBarrierSBEndBB
+                            : AArch64::SpeculationBarrierISBDSBEndBB;
+  if (MBBI == MBB.end() ||
+      (MBBI->getOpcode() != AArch64::SpeculationBarrierSBEndBB &&
+       MBBI->getOpcode() != AArch64::SpeculationBarrierISBDSBEndBB))
+    BuildMI(MBB, MBBI, DL, TII->get(BarrierOpc));
 }
 
 bool AArch64SLSHardening::runOnMachineFunction(MachineFunction &MF) {
@@ -91,12 +98,30 @@
   TRI = MF.getSubtarget().getRegisterInfo();
 
   bool Modified = false;
-  for (auto &MBB : MF)
+  for (auto &MBB : MF) {
     Modified |= hardenReturnsAndBRs(MBB);
+    Modified |= hardenBLRs(MBB);
+  }
 
   return Modified;
 }
 
+static bool isBLR(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case AArch64::BLR:
+  case AArch64::BLRNoIP:
+    return true;
+  case AArch64::BLRAA:
+  case AArch64::BLRAB:
+  case AArch64::BLRAAZ:
+  case AArch64::BLRABZ:
+    llvm_unreachable("Currently, LLVM's code generator does not support "
+                     "producing BLRA* instructions. Therefore, there's no "
+                     "support in this pass for those instructions.");
+  }
+  return false;
+}
+
 bool AArch64SLSHardening::hardenReturnsAndBRs(MachineBasicBlock &MBB) const {
   if (!ST->hardenSlsRetBr())
     return false;
@@ -108,7 +133,244 @@
     NextMBBI = std::next(MBBI);
     if (MI.isReturn() || isIndirectBranchOpcode(MI.getOpcode())) {
       assert(MI.isTerminator());
-      insertSpeculationBarrier(MBB, std::next(MBBI), MI.getDebugLoc());
+      insertSpeculationBarrier(ST, MBB, std::next(MBBI), MI.getDebugLoc());
+      Modified = true;
+    }
+  }
+  return Modified;
+}
+
+static const char SLSBLRNamePrefix[] = "__llvm_slsblr_thunk_";
+
+static std::array<const char *, 29> SLSBLRThunkNames{
+    "__llvm_slsblr_thunk_x0",  "__llvm_slsblr_thunk_x1",
+    "__llvm_slsblr_thunk_x2",  "__llvm_slsblr_thunk_x3",
+    "__llvm_slsblr_thunk_x4",  "__llvm_slsblr_thunk_x5",
+    "__llvm_slsblr_thunk_x6",  "__llvm_slsblr_thunk_x7",
+    "__llvm_slsblr_thunk_x8",  "__llvm_slsblr_thunk_x9",
+    "__llvm_slsblr_thunk_x10", "__llvm_slsblr_thunk_x11",
+    "__llvm_slsblr_thunk_x12", "__llvm_slsblr_thunk_x13",
+    "__llvm_slsblr_thunk_x14", "__llvm_slsblr_thunk_x15",
+    // X16 and X17 are deliberately missing, as the mitigation requires those
+    // register to not be used in BLR. See comment in ConvertBLRToBL for more
+    // details.
+    "__llvm_slsblr_thunk_x18", "__llvm_slsblr_thunk_x19",
+    "__llvm_slsblr_thunk_x20", "__llvm_slsblr_thunk_x21",
+    "__llvm_slsblr_thunk_x22", "__llvm_slsblr_thunk_x23",
+    "__llvm_slsblr_thunk_x24", "__llvm_slsblr_thunk_x25",
+    "__llvm_slsblr_thunk_x26", "__llvm_slsblr_thunk_x27",
+    "__llvm_slsblr_thunk_x28", "__llvm_slsblr_thunk_x29",
+    // X30 is deliberately missing, for similar reasons as X16 and X17 are
+    // missing.
+    "__llvm_slsblr_thunk_x31",
+};
+static std::array<unsigned, 29> SLSBLRThunkRegs{
+    AArch64::X0,  AArch64::X1,  AArch64::X2,  AArch64::X3,  AArch64::X4,
+    AArch64::X5,  AArch64::X6,  AArch64::X7,  AArch64::X8,  AArch64::X9,
+    AArch64::X10, AArch64::X11, AArch64::X12, AArch64::X13, AArch64::X14,
+    AArch64::X15, AArch64::X18, AArch64::X19, AArch64::X20, AArch64::X21,
+    AArch64::X22, AArch64::X23, AArch64::X24, AArch64::X25, AArch64::X26,
+    AArch64::X27, AArch64::X28, AArch64::FP,  AArch64::XZR};
+
+namespace {
+struct SLSBLRThunkInserter : ThunkInserter<SLSBLRThunkInserter> {
+  const char *getThunkPrefix() { return SLSBLRNamePrefix; }
+  bool mayUseThunk(const MachineFunction &MF) {
+    // FIXME: This could also check if there are any BLRs in the function
+    // to more accurately reflect if a thunk will be needed.
+    return MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr();
+  }
+  void insertThunks(MachineModuleInfo &MMI);
+  void populateThunk(MachineFunction &MF);
+};
+} // namespace
+
+void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) {
+  // FIXME: It probably would be possible to filter which thunks to produce
+  // based on which registers are actually used in BLR instructions in this
+  // function. But would that be a worthwhile optimization?
+  for (StringRef Name : SLSBLRThunkNames)
+    createThunkFunction(MMI, Name);
+}
+
+void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
+  // FIXME: How to better communicate Register number, rather than through
+  // name and lookup table?
+  assert(MF.getName().startswith(getThunkPrefix()));
+  int Index = -1;
+  for (int i = 0; i < (int)SLSBLRThunkNames.size(); ++i)
+    if (MF.getName() == SLSBLRThunkNames[i]) {
+      Index = i;
+      break;
+    }
+  assert(Index != -1);
+  Register ThunkReg = SLSBLRThunkRegs[Index];
+
+  const TargetInstrInfo *TII =
+      MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+  // Grab the entry MBB and erase any other blocks. O0 codegen appears to
+  // generate two bbs for the entry block.
+  MachineBasicBlock *Entry = &MF.front();
+  Entry->clear();
+  while (MF.size() > 1)
+    MF.erase(std::next(MF.begin()));
+
+  //  These thunks need to consist of the following instructions:
+  //  __llvm_slsblr_thunk_xN:
+  //      BR xN
+  //      barrierInsts
+  Entry->addLiveIn(ThunkReg);
+  BuildMI(Entry, DebugLoc(), TII->get(AArch64::BR)).addReg(ThunkReg);
+  // Make sure the thunks do not make use of the SB extension in case there is
+  // a function somewhere that will call to it that for some reason disabled
+  // the SB extension locally on that function, even though it's enabled for
+  // the module otherwise. Therefore set AlwaysUseISBSDB to true.
+  insertSpeculationBarrier(&MF.getSubtarget<AArch64Subtarget>(), *Entry,
+                           Entry->end(), DebugLoc(), true /*AlwaysUseISBDSB*/);
+}
+
+MachineBasicBlock &
+AArch64SLSHardening::ConvertBLRToBL(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI) const {
+  // Transform a BLR to a BL as follows:
+  // Before:
+  //   |-----------------------------|
+  //   |      ...                    |
+  //   |  instI                      |
+  //   |  BLR xN                     |
+  //   |  instJ                      |
+  //   |      ...                    |
+  //   |-----------------------------|
+  //
+  // After:
+  //   |-----------------------------|
+  //   |      ...                    |
+  //   |  instI                      |
+  //   |  BL __llvm_slsblr_thunk_xN  |
+  //   |  instJ                      |
+  //   |      ...                    |
+  //   |-----------------------------|
+  //
+  //   __llvm_slsblr_thunk_xN:
+  //   |-----------------------------|
+  //   |  BR xN                      |
+  //   |  barrierInsts               |
+  //   |-----------------------------|
+  //
+  // The __llvm_slsblr_thunk_xN thunks are created by the SLSBLRThunkInserter.
+  // This function merely needs to transform BLR xN into BL
+  // __llvm_slsblr_thunk_xN.
+  //
+  // Since linkers are allowed to clobber X16 and X17 on function calls, the
+  // above mitigation only works if the original BLR instruction was not
+  // BLR X16 nor BLR X17. Code generation before must make sure that no BLR
+  // X16|X17 was produced if the mitigation is enabled.
+
+  MachineInstr &BLR = *MBBI;
+  assert(isBLR(BLR));
+  unsigned BLOpcode;
+  Register Reg;
+  bool RegIsKilled;
+  switch (BLR.getOpcode()) {
+  case AArch64::BLR:
+  case AArch64::BLRNoIP:
+    BLOpcode = AArch64::BL;
+    Reg = BLR.getOperand(0).getReg();
+    assert(Reg != AArch64::X16 && Reg != AArch64::X17 && Reg != AArch64::LR);
+    RegIsKilled = BLR.getOperand(0).isKill();
+    break;
+  case AArch64::BLRAA:
+  case AArch64::BLRAB:
+  case AArch64::BLRAAZ:
+  case AArch64::BLRABZ:
+    llvm_unreachable("BLRA instructions cannot yet be produced by LLVM, "
+                     "therefore there is no need to support them for now.");
+  default:
+    llvm_unreachable("unhandled BLR");
+  }
+  DebugLoc DL = BLR.getDebugLoc();
+
+  // If we'd like to support also BLRAA and BLRAB instructions, we'd need
+  // a lot more different kind of thunks.
+  // For example, a
+  //
+  // BLRAA xN, xM
+  //
+  // instruction probably would need to be transformed to something like:
+  //
+  // BL __llvm_slsblraa_thunk_x<N>_x<M>
+  //
+  // __llvm_slsblraa_thunk_x<N>_x<M>:
+  //   BRAA x<N>, x<M>
+  //   barrierInsts
+  //
+  // Given that about 30 different values of N are possible and about 30
+  // different values of M are possible in the above, with the current way
+  // of producing indirect thunks, we'd be producing about 30 times 30, i.e.
+  // about 900 thunks (where most might not be actually called). This would
+  // multiply further by two to support both BLRAA and BLRAB variants of those
+  // instructions.
+  // If we'd want to support this, we'd probably need to look into a different
+  // way to produce thunk functions, based on which variants are actually
+  // needed, rather than producing all possible variants.
+  // So far, LLVM does never produce BLRA* instructions, so let's leave this
+  // for the future when LLVM can start producing BLRA* instructions.
+  MachineFunction &MF = *MBBI->getMF();
+  MCContext &Context = MBB.getParent()->getContext();
+  MCSymbol *Sym = Context.getOrCreateSymbol("__llvm_slsblr_thunk_x" +
+                                            utostr(Reg - AArch64::X0));
+
+  MachineInstr *BL = BuildMI(MBB, MBBI, DL, TII->get(BLOpcode)).addSym(Sym);
+
+  // Now copy the implicit operands from BLR to BL and copy other necessary
+  // info.
+  // However, both BLR and BL instructions implictly use SP and implicitly
+  // define LR. Blindly copying implicit operands would result in SP and LR
+  // operands to be present multiple times. While this may not be too much of
+  // an issue, let's avoid that for cleanliness, by removing those implicit
+  // operands from the BL created above before we copy over all implicit
+  // operands from the BLR.
+  int ImpLROpIdx = -1;
+  int ImpSPOpIdx = -1;
+  for (unsigned OpIdx = BL->getNumExplicitOperands();
+       OpIdx < BL->getNumOperands(); OpIdx++) {
+    MachineOperand Op = BL->getOperand(OpIdx);
+    if (!Op.isReg())
+      continue;
+    if (Op.getReg() == AArch64::LR && Op.isDef())
+      ImpLROpIdx = OpIdx;
+    if (Op.getReg() == AArch64::SP && !Op.isDef())
+      ImpSPOpIdx = OpIdx;
+  }
+  assert(ImpLROpIdx != -1);
+  assert(ImpSPOpIdx != -1);
+  int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx);
+  int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx);
+  BL->RemoveOperand(FirstOpIdxToRemove);
+  BL->RemoveOperand(SecondOpIdxToRemove);
+  // Now copy over the implicit operands from the original BLR
+  BL->copyImplicitOps(MF, BLR);
+  MF.moveCallSiteInfo(&BLR, BL);
+  // Also add the register called in the BLR as being used in the called thunk.
+  BL->addOperand(MachineOperand::CreateReg(Reg, false /*isDef*/, true /*isImp*/,
+                                           RegIsKilled /*isKill*/));
+  // Remove BLR instruction
+  MBB.erase(MBBI);
+
+  return MBB;
+}
+
+bool AArch64SLSHardening::hardenBLRs(MachineBasicBlock &MBB) const {
+  if (!ST->hardenSlsBlr())
+    return false;
+  bool Modified = false;
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  MachineBasicBlock::iterator NextMBBI;
+  for (; MBBI != E; MBBI = NextMBBI) {
+    MachineInstr &MI = *MBBI;
+    NextMBBI = std::next(MBBI);
+    if (isBLR(MI)) {
+      ConvertBLRToBL(MBB, MBBI);
       Modified = true;
     }
   }
@@ -118,3 +380,60 @@
 FunctionPass *llvm::createAArch64SLSHardeningPass() {
   return new AArch64SLSHardening();
 }
+
+namespace {
+class AArch64IndirectThunks : public MachineFunctionPass {
+public:
+  static char ID;
+
+  AArch64IndirectThunks() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override { return "AArch64 Indirect Thunks"; }
+
+  bool doInitialization(Module &M) override;
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    MachineFunctionPass::getAnalysisUsage(AU);
+    AU.addRequired<MachineModuleInfoWrapperPass>();
+    AU.addPreserved<MachineModuleInfoWrapperPass>();
+  }
+
+private:
+  std::tuple<SLSBLRThunkInserter> TIs;
+
+  // FIXME: When LLVM moves to C++17, these can become folds
+  template <typename... ThunkInserterT>
+  static void initTIs(Module &M,
+                      std::tuple<ThunkInserterT...> &ThunkInserters) {
+    (void)std::initializer_list<int>{
+        (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
+  }
+  template <typename... ThunkInserterT>
+  static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
+                     std::tuple<ThunkInserterT...> &ThunkInserters) {
+    bool Modified = false;
+    (void)std::initializer_list<int>{
+        Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
+    return Modified;
+  }
+};
+
+} // end anonymous namespace
+
+char AArch64IndirectThunks::ID = 0;
+
+FunctionPass *llvm::createAArch64IndirectThunks() {
+  return new AArch64IndirectThunks();
+}
+
+bool AArch64IndirectThunks::doInitialization(Module &M) {
+  initTIs(M, TIs);
+  return false;
+}
+
+bool AArch64IndirectThunks::runOnMachineFunction(MachineFunction &MF) {
+  LLVM_DEBUG(dbgs() << getPassName() << '\n');
+  auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+  return runTIs(MMI, MF, TIs);
+}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -211,6 +211,7 @@
   bool UseEL3ForTP = false;
   bool AllowTaggedGlobals = false;
   bool HardenSlsRetBr = false;
+  bool HardenSlsBlr = false;
   uint8_t MaxInterleaveFactor = 2;
   uint8_t VectorInsertExtractBaseCost = 3;
   uint16_t CacheLineSize = 0;
@@ -365,6 +366,7 @@
   }
 
   bool hardenSlsRetBr() const { return HardenSlsRetBr; }
+  bool hardenSlsBlr() const { return HardenSlsBlr; }
 
   bool useEL1ForTP() const { return UseEL1ForTP; }
   bool useEL2ForTP() const { return UseEL2ForTP; }
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -636,6 +636,7 @@
   // info.
   addPass(createAArch64SpeculationHardeningPass());
 
+  addPass(createAArch64IndirectThunks());
   addPass(createAArch64SLSHardeningPass());
 
   if (TM->getOptLevel() != CodeGenOpt::None) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -773,17 +773,17 @@
   return true;
 }
 
-static unsigned getCallOpcode(const Function &CallerF, bool IsIndirect,
+static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
                               bool IsTailCall) {
   if (!IsTailCall)
-    return IsIndirect ? AArch64::BLR : AArch64::BL;
+    return IsIndirect ? getBLRCallOpcode(CallerF) : AArch64::BL;
 
   if (!IsIndirect)
     return AArch64::TCRETURNdi;
 
   // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
   // x16 or x17.
-  if (CallerF.hasFnAttribute("branch-target-enforcement"))
+  if (CallerF.getFunction().hasFnAttribute("branch-target-enforcement"))
     return AArch64::TCRETURNriBTI;
 
   return AArch64::TCRETURNri;
@@ -819,7 +819,7 @@
   if (!IsSibCall)
     CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
 
-  unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), true);
+  unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
   MIB.add(Info.Callee);
 
@@ -979,7 +979,7 @@
 
   // Create a temporarily-floating call instruction so we can add the implicit
   // uses of arg registers.
-  unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), false);
+  unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
 
   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
   MIB.add(Info.Callee);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2890,7 +2890,7 @@
   // TLS calls preserve all registers except those that absolutely must be
   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
   // silly).
-  MIB.buildInstr(AArch64::BLR, {}, {Load})
+  MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
       .addDef(AArch64::X0, RegState::Implicit)
       .addRegMask(TRI.getTLSCallPreservedMask());
 
diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -55,6 +55,7 @@
 ; CHECK-NEXT:       Post-RA pseudo instruction expansion pass
 ; CHECK-NEXT:       AArch64 pseudo instruction expansion pass
 ; CHECK-NEXT:       AArch64 speculation hardening pass
+; CHECK-NEXT:       AArch64 Indirect Thunks
 ; CHECK-NEXT:       AArch64 sls hardening pass
 ; CHECK-NEXT:       Analyze Machine Code For Garbage Collection
 ; CHECK-NEXT:       Insert fentry calls
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -178,6 +178,7 @@
 ; CHECK-NEXT:       AArch64 pseudo instruction expansion pass
 ; CHECK-NEXT:       AArch64 load / store optimization pass
 ; CHECK-NEXT:       AArch64 speculation hardening pass
+; CHECK-NEXT:       AArch64 Indirect Thunks
 ; CHECK-NEXT:       AArch64 sls hardening pass
 ; CHECK-NEXT:       MachineDominator Tree Construction
 ; CHECK-NEXT:       Machine Natural Loop Construction
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-sls-blr.mir b/llvm/test/CodeGen/AArch64/speculation-hardening-sls-blr.mir
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/speculation-hardening-sls-blr.mir
@@ -0,0 +1,58 @@
+# RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu \
+# RUN:     -start-before aarch64-sls-hardening \
+# RUN:     -stop-after aarch64-sls-hardening -o - %s \
+# RUN:   | FileCheck %s --check-prefixes=CHECK
+
+# Check that the BLR SLS hardening transforms a BLR into a BL with operands as
+# expected.
+--- |
+  $__llvm_slsblr_thunk_x8 = comdat any
+  @a = dso_local local_unnamed_addr global i32 (...)* null, align 8
+  @b = dso_local local_unnamed_addr global i32 0, align 4
+
+  define dso_local void @fn1() local_unnamed_addr "target-features"="+harden-sls-blr" {
+  entry:
+    %0 = load i32 ()*, i32 ()** bitcast (i32 (...)** @a to i32 ()**), align 8
+    %call = tail call i32 %0() nounwind
+    store i32 %call, i32* @b, align 4
+    ret void
+  }
+
+  ; Function Attrs: naked nounwind
+  define linkonce_odr hidden void @__llvm_slsblr_thunk_x8() naked nounwind comdat {
+  entry:
+    ret void
+  }
+...
+---
+name:            fn1
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: fn1
+  bb.0.entry:
+    liveins: $lr
+
+    early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 ; :: (store 8 into %stack.0)
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $w30, -16
+    renamable $x8 = ADRP target-flags(aarch64-page) @a
+    renamable $x8 = LDRXui killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @a :: (dereferenceable load 8 from `i32 ()** bitcast (i32 (...)** @a to i32 ()**)`)
+    BLRNoIP killed renamable $x8, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+  ; CHECK:  BL <mcsymbol __llvm_slsblr_thunk_x8>, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0, implicit killed $x8
+    renamable $x8 = ADRP target-flags(aarch64-page) @b
+    STRWui killed renamable $w0, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @b :: (store 4 into @b)
+    early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 ; :: (load 8 from %stack.0)
+    RET undef $lr
+
+
+...
+---
+name:            __llvm_slsblr_thunk_x8
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x8
+
+    BR $x8
+    SpeculationBarrierISBDSBEndBB
+...
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-sls.ll b/llvm/test/CodeGen/AArch64/speculation-hardening-sls.ll
--- a/llvm/test/CodeGen/AArch64/speculation-hardening-sls.ll
+++ b/llvm/test/CodeGen/AArch64/speculation-hardening-sls.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mattr=harden-sls-retbr -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,ISBDSB
-; RUN: llc -mattr=harden-sls-retbr -mattr=+sb -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,SB
+; RUN: llc -mattr=harden-sls-retbr,harden-sls-blr -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,HARDEN,ISBDSB
+; RUN: llc -mattr=harden-sls-retbr,harden-sls-blr -mattr=+sb -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,HARDEN,SB
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,NOHARDEN
 
 
 ; Function Attrs: norecurse nounwind readnone
@@ -24,33 +25,39 @@
 ; ISBDSB-NEXT: dsb sy
 ; ISBDSB-NEXT: isb
 ; SB-NEXT:     {{ sb$}}
+; CHECK-NEXT: .Lfunc_end
 }
 
 @__const.indirect_branch.ptr = private unnamed_addr constant [2 x i8*] [i8* blockaddress(@indirect_branch, %return), i8* blockaddress(@indirect_branch, %l2)], align 8
 
 ; Function Attrs: norecurse nounwind readnone
 define dso_local i32 @indirect_branch(i32 %a, i32 %b, i32 %i) {
+; CHECK-LABEL: indirect_branch:
 entry:
   %idxprom = sext i32 %i to i64
   %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @__const.indirect_branch.ptr, i64 0, i64 %idxprom
   %0 = load i8*, i8** %arrayidx, align 8
   indirectbr i8* %0, [label %return, label %l2]
+; CHECK:       br x
+; ISBDSB-NEXT: dsb sy
+; ISBDSB-NEXT: isb
+; SB-NEXT:     {{ sb$}}
 
 l2:                                               ; preds = %entry
   br label %return
+; CHECK:       {{ret$}}
+; ISBDSB-NEXT: dsb sy
+; ISBDSB-NEXT: isb
+; SB-NEXT:     {{ sb$}}
 
 return:                                           ; preds = %entry, %l2
   %retval.0 = phi i32 [ 1, %l2 ], [ 0, %entry ]
   ret i32 %retval.0
-; CHECK-LABEL: indirect_branch:
-; CHECK:       br x
-; ISBDSB-NEXT: dsb sy
-; ISBDSB-NEXT: isb
-; SB-NEXT:     {{ sb$}}
 ; CHECK:       {{ret$}}
 ; ISBDSB-NEXT: dsb sy
 ; ISBDSB-NEXT: isb
 ; SB-NEXT:     {{ sb$}}
+; CHECK-NEXT: .Lfunc_end
 }
 
 ; Check that RETAA and RETAB instructions are also protected as expected.
@@ -61,6 +68,7 @@
 ; ISBDSB-NEXT: dsb sy
 ; ISBDSB-NEXT: isb
 ; SB-NEXT:     {{ sb$}}
+; CHECK-NEXT: .Lfunc_end
 	  ret i32 %a
 }
 
@@ -71,6 +79,7 @@
 ; ISBDSB-NEXT: dsb sy
 ; ISBDSB-NEXT: isb
 ; SB-NEXT:     {{ sb$}}
+; CHECK-NEXT: .Lfunc_end
 	  ret i32 %a
 }
 
@@ -102,3 +111,72 @@
 ; SB-NEXT:     {{ sb$}}
 ; CHECK-NEXT: .Lfunc_end
 }
+
+define dso_local i32 @indirect_call(
+i32 (...)* nocapture %f1, i32 (...)* nocapture %f2) {
+entry:
+; CHECK-LABEL: indirect_call:
+  %callee.knr.cast = bitcast i32 (...)* %f1 to i32 ()*
+  %call = tail call i32 %callee.knr.cast()
+; HARDEN: bl {{__llvm_slsblr_thunk_x[0-9]+$}}
+  %callee.knr.cast1 = bitcast i32 (...)* %f2 to i32 ()*
+  %call2 = tail call i32 %callee.knr.cast1()
+; HARDEN: bl {{__llvm_slsblr_thunk_x[0-9]+$}}
+  %add = add nsw i32 %call2, %call
+  ret i32 %add
+; CHECK: .Lfunc_end
+}
+
+; verify calling through a function pointer.
+@a = dso_local local_unnamed_addr global i32 (...)* null, align 8
+@b = dso_local local_unnamed_addr global i32 0, align 4
+define dso_local void @indirect_call_global() local_unnamed_addr {
+; CHECK-LABEL: indirect_call_global:
+entry:
+  %0 = load i32 ()*, i32 ()** bitcast (i32 (...)** @a to i32 ()**), align 8
+  %call = tail call i32 %0()  nounwind
+; HARDEN: bl {{__llvm_slsblr_thunk_x[0-9]+$}}
+  store i32 %call, i32* @b, align 4
+  ret void
+; CHECK: .Lfunc_end
+}
+
+; Verify that neither x16 nor x17 are used when the BLR mitigation is enabled,
+; as a linker is allowed to clobber x16 or x17 on calls, which would break the
+; correct execution of the code sequence produced by the mitigation.
+; The below test carefully increases register pressure to persuade code
+; generation to produce a BLR x16. Yes, that is a bit fragile.
+define i64 @check_x16(i64 (i8*, i64, i64, i64, i64, i64, i64, i64)** nocapture readonly %fp, i64 (i8*, i64, i64, i64, i64, i64, i64, i64)** nocapture readonly %fp2) "target-features"="+neon,+reserve-x10,+reserve-x11,+reserve-x12,+reserve-x13,+reserve-x14,+reserve-x15,+reserve-x18,+reserve-x20,+reserve-x21,+reserve-x22,+reserve-x23,+reserve-x24,+reserve-x25,+reserve-x26,+reserve-x27,+reserve-x28,+reserve-x30,+reserve-x9" {
+entry:
+; CHECK-LABEL: check_x16:
+  %0 = load i64 (i8*, i64, i64, i64, i64, i64, i64, i64)*, i64 (i8*, i64, i64, i64, i64, i64, i64, i64)** %fp, align 8
+  %1 = bitcast i64 (i8*, i64, i64, i64, i64, i64, i64, i64)** %fp2 to i8**
+  %2 = load i8*, i8** %1, align 8
+  %call = call i64 %0(i8* %2, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0)
+  %3 = load i64 (i8*, i64, i64, i64, i64, i64, i64, i64)*, i64 (i8*, i64, i64, i64, i64, i64, i64, i64)** %fp2, align 8
+  %4 = bitcast i64 (i8*, i64, i64, i64, i64, i64, i64, i64)** %fp to i8**
+  %5 = load i8*, i8** %4, align 8;, !tbaa !2
+  %call1 = call i64 %3(i8* %5, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0)
+; NOHARDEN:   blr x16
+; ISBDSB-NOT: bl __llvm_slsblr_thunk_x16
+; SB-NOT:     bl __llvm_slsblr_thunk_x16
+; CHECK
+  %add = add nsw i64 %call1, %call
+  ret i64 %add
+; CHECK: .Lfunc_end
+}
+
+; HARDEN-label: __llvm_slsblr_thunk_x0:
+; HARDEN:    br x0
+; ISBDSB-NEXT: dsb sy
+; ISBDSB-NEXT: isb
+; SB-NEXT:     dsb sy
+; SB-NEXT:     isb
+; HARDEN-NEXT: .Lfunc_end
+; HARDEN-label: __llvm_slsblr_thunk_x19:
+; HARDEN:    br x19
+; ISBDSB-NEXT: dsb sy
+; ISBDSB-NEXT: isb
+; SB-NEXT:     dsb sy
+; SB-NEXT:     isb
+; HARDEN-NEXT: .Lfunc_end