Index: llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
+++ llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
@@ -38,7 +38,7 @@
   StringRef getPassName() const override { return AARCH64_BRANCH_TARGETS_NAME; }
 
 private:
-  void addBTI(MachineBasicBlock &MBB, bool CouldCall, bool CouldJump);
+  bool addBTI(MachineBasicBlock &MBB, bool CouldCall, bool CouldJump);
 };
 } // end anonymous namespace
 
@@ -91,16 +91,14 @@
     if (MBB.hasAddressTaken() || JumpTableTargets.count(&MBB))
       CouldJump = true;
 
-    if (CouldCall || CouldJump) {
-      addBTI(MBB, CouldCall, CouldJump);
-      MadeChange = true;
-    }
+    if (CouldCall || CouldJump)
+      MadeChange |= addBTI(MBB, CouldCall, CouldJump);
   }
 
   return MadeChange;
 }
 
-void AArch64BranchTargets::addBTI(MachineBasicBlock &MBB, bool CouldCall,
+bool AArch64BranchTargets::addBTI(MachineBasicBlock &MBB, bool CouldCall,
                                   bool CouldJump) {
   LLVM_DEBUG(dbgs() << "Adding BTI " << (CouldJump ? "j" : "")
                     << (CouldCall ? "c" : "") << " to " << MBB.getName()
@@ -109,25 +107,47 @@
   const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(
       MBB.getParent()->getSubtarget().getInstrInfo());
 
-  unsigned HintNum = 32;
-  if (CouldCall)
-    HintNum |= 2;
-  if (CouldJump)
-    HintNum |= 4;
-  assert(HintNum != 32 && "No target kinds!");
-
   auto MBBI = MBB.begin();
 
-  // Skip the meta instuctions, those will be removed anyway.
+  // Skip the meta instructions, those will be removed anyway.
   for (; MBBI != MBB.end() && MBBI->isMetaInstruction(); ++MBBI)
     ;
 
+  if (MBBI == MBB.end())
+    return false;
+
+  const AArch64Subtarget &Subtarget =
+      MBB.getParent()->getSubtarget<AArch64Subtarget>();
+
   // PACI[AB]SP are implicitly BTI JC, so no BTI instruction needed there.
-  if (MBBI != MBB.end() && (MBBI->getOpcode() == AArch64::PACIASP ||
-                            MBBI->getOpcode() == AArch64::PACIBSP))
-    return;
+  if (MBBI->getOpcode() == AArch64::PACIASP ||
+      MBBI->getOpcode() == AArch64::PACIBSP)
+    return false;
+
+  // Check if PACI[AB] LR, SP is the first instruction.
+  if (Subtarget.hasV8_3aOps() &&
+      (MBBI->getOpcode() == AArch64::PACIA ||
+       MBBI->getOpcode() == AArch64::PACIB) &&
+      (MBBI->getNumOperands() == 2 &&
+       MBBI->getOperand(0).getReg() == AArch64::LR &&
+       MBBI->getOperand(1).getReg() == AArch64::SP)) {
+
+    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBB.begin()),
+            TII->get(MBBI->getOpcode() == AArch64::PACIA ? AArch64::PACIASP
+                                                         : AArch64::PACIBSP));
+    MBB.erase(MBBI);
+    return true;
+  }
+
+  unsigned HintNum = 32;
+  if (CouldCall)
+    HintNum |= 2;
+  if (CouldJump)
+    HintNum |= 4;
+  assert(HintNum != 32 && "No target kinds!");
 
   BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
           TII->get(AArch64::HINT))
       .addImm(HintNum);
+  return true;
 }
Index: llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -937,14 +937,28 @@
   DebugLoc DL;
 
   if (ShouldSignReturnAddress(MF)) {
-    if (ShouldSignWithAKey(MF))
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
-          .setMIFlag(MachineInstr::FrameSetup);
-    else {
+    if (ShouldSignWithAKey(MF)) {
+      if (Subtarget.hasV8_3aOps()) {
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIA))
+            .addReg(AArch64::LR, RegState::Define)
+            .addReg(AArch64::SP, RegState::InternalRead)
+            .setMIFlag(MachineInstr::FrameSetup);
+      } else {
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
+            .setMIFlag(MachineInstr::FrameSetup);
+      }
+    } else {
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
           .setMIFlag(MachineInstr::FrameSetup);
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
-          .setMIFlag(MachineInstr::FrameSetup);
+      if (Subtarget.hasV8_3aOps()) {
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIB))
+            .addReg(AArch64::LR, RegState::Define)
+            .addReg(AArch64::SP, RegState::InternalRead)
+            .setMIFlag(MachineInstr::FrameSetup);
+      } else {
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
+            .setMIFlag(MachineInstr::FrameSetup);
+      }
     }
 
     unsigned CFIIndex =
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6144,6 +6144,13 @@
   // Don't outline anything used for return address signing. The outlined
   // function will get signed later if needed
   switch (MI.getOpcode()) {
+  case AArch64::PACIA:
+  case AArch64::PACIB:
+    /* PACI[A|B] LR, SP is same as PACI[A|B]SP */
+    if (MI.getNumOperands() == 2 && MI.getOperand(0).getReg() == AArch64::LR &&
+        MI.getOperand(1).getReg() == AArch64::SP)
+      return outliner::InstrType::Illegal;
+    break;
   case AArch64::PACIASP:
   case AArch64::PACIBSP:
   case AArch64::AUTIASP:
@@ -6331,20 +6338,37 @@
       DL = MBBAUT->getDebugLoc();
 
     // At the very beginning of the basic block we insert the following
-    // depending on the key type
+    // depending on the key type.
+    // PACI[A|B] LR, SP is effectively the same PAuth instruction as PACI[A|B]SP
+    // except it is not a BTI landing pad. BTI pass will manage the BTI property
+    // accordingly.
     //
     // a_key:                   b_key:
     //    PACIASP                   EMITBKEY
     //    CFI_INSTRUCTION           PACIBSP
     //                              CFI_INSTRUCTION
     if (ShouldSignReturnAddrWithAKey) {
-      BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIASP))
-          .setMIFlag(MachineInstr::FrameSetup);
+      if (Subtarget.hasV8_3aOps()) {
+        BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIA))
+            .addReg(AArch64::LR, RegState::Define)
+            .addReg(AArch64::SP, RegState::InternalRead)
+            .setMIFlag(MachineInstr::FrameSetup);
+      } else {
+        BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIASP))
+            .setMIFlag(MachineInstr::FrameSetup);
+      }
     } else {
       BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY))
           .setMIFlag(MachineInstr::FrameSetup);
-      BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIBSP))
-          .setMIFlag(MachineInstr::FrameSetup);
+      if (Subtarget.hasV8_3aOps()) {
+        BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIB))
+            .addReg(AArch64::LR, RegState::Define)
+            .addReg(AArch64::SP, RegState::InternalRead)
+            .setMIFlag(MachineInstr::FrameSetup);
+      } else {
+        BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIBSP))
+            .setMIFlag(MachineInstr::FrameSetup);
+      }
     }
     unsigned CFIIndex =
         MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
Index: llvm/test/CodeGen/AArch64/branch-target-enforcement-direct-calls.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/branch-target-enforcement-direct-calls.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple aarch64--none-eabi -mattr=+bti < %s | FileCheck %s --check-prefixes=CHECK,CHECK8_0
+; RUN: llc -mtriple aarch64--none-eabi -mattr=+bti -mattr=v8.3a < %s | FileCheck %s --check-prefixes=CHECK,CHECK8_3
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-arm-none-eabi"
+
+; When BTI is enabled and Arm v8.3 is avaliable then in case of function that is called
+; only with direct branch the landing pad should be replaced with an equivalent non landing
+; instruction.
+
+define internal void @localfunc() #0 {
+;CHECK:  localfunc:
+;CHECK8_0:          hint    #25
+;CHECK8_3:          pacia x30, sp
+entry:
+  %l1 = alloca i32, align 4
+  ret void
+}
+
+; Global function should have landing pad. Note hint #25 is paciasp.
+;CHECK: bti_enabled:
+;CHECK8_0:        hint    #25
+;CHECK8_3:        paciasp
+define void @bti_enabled() #0  {
+entry:
+  %l1 = alloca i32, align 4
+  tail call void @localfunc()
+  ret void
+}
+
+attributes #0 = { noinline  "branch-target-enforcement" "sign-return-address"="all" "sign-return-address-key"="a_key" }
\ No newline at end of file
Index: llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.ll
===================================================================
--- llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.ll
+++ llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.ll
@@ -6,7 +6,7 @@
 ; CHECK-LABEL:  foo:                                    // @foo
 ; CHECK-NEXT:   .Lfoo$local:
 ; CHECK-NEXT:   // %bb.0:                               // %entry
-; CHECK-NEXT:       paciasp
+; CHECK-NEXT:       pacia x30, sp
 ; CHECK-NOT:        OUTLINED_FUNCTION_
 ; CHECK:            retaa
 define dso_local void @foo(i32 %x) #0 {
@@ -25,7 +25,7 @@
 ; CHECK-LABEL:  bar:                                    // @bar
 ; CHECK-NEXT:   .Lbar$local:
 ; CHECK-NEXT:   // %bb.0:                               // %entry
-; CHECK-NEXT:       paciasp
+; CHECK-NEXT:       pacia x30, sp
 ; CHECK-NOT:        OUTLINED_FUNCTION_
 ; CHECK:            retaa
 define dso_local void @bar(i32 %x) #0 {
Index: llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll
===================================================================
--- llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll
+++ llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll
@@ -9,7 +9,7 @@
 ; CHECK-LABEL:      a:                                     // @a
 ; CHECK:            // %bb.0:
 ; CHECK-NEXT:               .cfi_b_key_frame
-; CHECK-NEXT:               pacibsp
+; CHECK-NEXT:               pacib   x30, sp
 ; CHECK-NEXT:               .cfi_negate_ra_state
 ; CHECK-NOT:                OUTLINED_FUNCTION_
   %1 = alloca i32, align 4
@@ -33,7 +33,7 @@
 ; CHECK-LABEL:      b:                                     // @b
 ; CHECK:            // %bb.0:
 ; CHECK-NEXT:               .cfi_b_key_frame
-; CHECK-NEXT:               pacibsp
+; CHECK-NEXT:               pacib   x30, sp
 ; CHECK-NEXT:               .cfi_negate_ra_state
 ; CHECK-NOT:                OUTLINED_FUNCTION_
   %1 = alloca i32, align 4
Index: llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-v8-3.ll
===================================================================
--- llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-v8-3.ll
+++ llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-v8-3.ll
@@ -7,7 +7,7 @@
 define void @a() #0 {
 ; CHECK-LABEL:      a:                                     // @a
 ; CHECK:            // %bb.0:
-; CHECK-NEXT:               pacibsp
+; CHECK-NEXT:               pacib x30, sp
 ; CHECK:                    bl [[OUTLINED_FUNC:OUTLINED_FUNCTION_[0-9]+]]
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
@@ -29,7 +29,7 @@
 define void @b() #0 {
 ; CHECK-LABEL:      b:                                     // @b
 ; CHECK:            // %bb.0:
-; CHECK-NEXT:               pacibsp
+; CHECK-NEXT:               pacib x30, sp
 ; CHECK:                    bl OUTLINED_FUNC
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
@@ -51,7 +51,7 @@
 define void @c() #0 {
 ; CHECK-LABEL:      c:                                     // @c
 ; CHECK:            // %bb.0:
-; CHECK-NEXT:               pacibsp
+; CHECK-NEXT:               pacib x30, sp
 ; CHECK:                    bl OUTLINED_FUNC
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
@@ -78,6 +78,6 @@
 ; CHECK:            OUTLINED_FUNC
 ; CHECK:            // %bb.0:
 ; CHECK-NEXT:               .cfi_b_key_frame
-; CHECK-NEXT:               pacibsp
+; CHECK-NEXT:               pacib x30, sp
 ; CHECK:                    retab
 ; CHECK-NOT:                auti[a,b]sp
Index: llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
===================================================================
--- llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
+++ llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
@@ -7,7 +7,7 @@
 
 ; CHECK: @_Z3fooi
 ; CHECK-V8A: hint #25
-; CHECK-V83A: paciasp
+; CHECK-V83A: pacia x30, sp
 ; CHECK-NEXT: .cfi_negate_ra_state
 ; CHECK-NOT: .cfi_negate_ra_state
 define dso_local i32 @_Z3fooi(i32 %x) #0 {
Index: llvm/test/CodeGen/AArch64/sign-return-address.ll
===================================================================
--- llvm/test/CodeGen/AArch64/sign-return-address.ll
+++ llvm/test/CodeGen/AArch64/sign-return-address.ll
@@ -26,7 +26,7 @@
 ; CHECK:       hint #25
 ; CHECK:       hint #29
 ; CHECK:       ret
-; CHECK-V83A:  paciasp
+; CHECK-V83A:  pacia x30, sp
 ; CHECK-V83A:  retaa
 define i32 @leaf_sign_all(i32 %x) "sign-return-address"="all" {
   ret i32 %x
@@ -34,7 +34,7 @@
 
 ; CHECK:             @leaf_clobbers_lr
 ; CHECK:             hint #25
-; CHECK-V83A:        paciasp
+; CHECK-V83A:        pacia x30, sp
 ; CHECK, CHECK-V83A: str x30, [sp, #-16]!
 ; CHECK, CHECK-V83A: ldr x30, [sp], #16
 ; CHECK:             hint #29
@@ -51,7 +51,7 @@
 ; CHECK:      hint #25
 ; CHECK:      hint #29
 ; CHECK:      ret
-; CHECK-V83A: paciasp
+; CHECK-V83A: pacia x30, sp
 ; CHECK-V83A: retaa
 define i32 @non_leaf_sign_all(i32 %x) "sign-return-address"="all" {
   %call = call i32 @foo(i32 %x)
@@ -60,7 +60,7 @@
 
 ; CHECK:             @non_leaf_sign_non_leaf
 ; CHECK:             hint #25
-; CHECK-V83A:        paciasp
+; CHECK-V83A:        pacia x30, sp
 ; CHECK, CHECK-V83A: str x30, [sp, #-16]!
 ; CHECK, CHECK-V83A: ldr x30, [sp], #16
 ; CHECK:             hint #29
@@ -72,7 +72,7 @@
 }
 
 ; CHECK-LABEL: @leaf_sign_all_v83
-; CHECK: paciasp
+; CHECK: pacia x30, sp
 ; CHECK-NOT: ret
 ; CHECK: retaa
 ; CHECK-NOT: ret
@@ -84,7 +84,7 @@
 
 ; CHECK-LABEL:       @spill_lr_and_tail_call
 ; CHECK:             hint #25
-; CHECK-V83A:        paciasp
+; CHECK-V83A:        pacia x30, sp
 ; CHECK, CHECK-V83A: str x30, [sp, #-16]!
 ; CHECK, CHECK-V83A: ldr x30, [sp], #16
 ; CHECK-V83A:        autiasp
@@ -99,7 +99,7 @@
 ; CHECK-LABEL: @leaf_sign_all_a_key
 ; CHECK:       hint #25
 ; CHECK:       hint #29
-; CHECK-V83A:  paciasp
+; CHECK-V83A:  pacia x30, sp
 ; CHECK-V83A:  retaa
 define i32 @leaf_sign_all_a_key(i32 %x) "sign-return-address"="all" "sign-return-address-key"="a_key" {
   ret i32 %x
@@ -108,14 +108,14 @@
 ; CHECK-LABEL: @leaf_sign_all_b_key
 ; CHECK:       hint #27
 ; CHECK:       hint #31
-; CHECK-V83A:  pacibsp
+; CHECK-V83A:  pacib x30, sp
 ; CHECK-V83A:  retab
 define i32 @leaf_sign_all_b_key(i32 %x) "sign-return-address"="all" "sign-return-address-key"="b_key" {
   ret i32 %x
 }
 
 ; CHECK-LABEL: @leaf_sign_all_v83_b_key
-; CHECK: pacibsp
+; CHECK: pacib x30, sp
 ; CHECK-NOT: ret
 ; CHECK: retab
 ; CHECK-NOT: ret