diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -355,6 +355,9 @@
 void initializeGCNNSAReassignPass(PassRegistry &);
 extern char &GCNNSAReassignID;
 
+void initializeGCNPreRABranchDistancePass(PassRegistry &);
+extern char &GCNPreRABranchDistanceID;
+
 void initializeGCNPreRAOptimizationsPass(PassRegistry &);
 extern char &GCNPreRAOptimizationsID;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -422,6 +422,7 @@
   initializeAMDGPUResourceUsageAnalysisPass(*PR);
   initializeGCNNSAReassignPass(*PR);
   initializeGCNPreRAOptimizationsPass(*PR);
+  initializeGCNPreRABranchDistancePass(*PR);
   initializeGCNRewritePartialRegUsesPass(*PR);
 }
 
@@ -1349,6 +1350,8 @@
   if (!usingDefaultRegAlloc())
     report_fatal_error(RegAllocOptNotSupportedMessage);
 
+  addPass(&GCNPreRABranchDistanceID);
+
   addPass(createSGPRAllocPass(false));
 
   // Equivalent of PEI for SGPRs.
@@ -1362,6 +1365,8 @@
   if (!usingDefaultRegAlloc())
     report_fatal_error(RegAllocOptNotSupportedMessage);
 
+  addPass(&GCNPreRABranchDistanceID);
+
   addPass(createSGPRAllocPass(true));
 
   // Commit allocated register changes. This is mostly necessary because too
@@ -1487,6 +1492,10 @@
   if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))
     return true;
 
+  if (parseOptionalRegister(YamlMFI.LongBranchReservedReg,
+                            MFI->LongBranchReservedReg))
+    return true;
+
   auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
     // Create a diagnostic for a the register string literal.
     const MemoryBuffer &Buffer =
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -110,6 +110,7 @@
   GCNMinRegStrategy.cpp
   GCNNSAReassign.cpp
   GCNPreRAOptimizations.cpp
+  GCNPreRABranchDistance.cpp
   GCNRegPressure.cpp
   GCNRewritePartialRegUses.cpp
   GCNSchedStrategy.cpp
diff --git a/llvm/lib/Target/AMDGPU/GCNPreRABranchDistance.cpp b/llvm/lib/Target/AMDGPU/GCNPreRABranchDistance.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNPreRABranchDistance.cpp
@@ -0,0 +1,193 @@
+//===-- GCNPreRABranchDistance.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// \brief Pass to estimate pre RA branch size and reserve a pair of SGPRs if
+/// there is a long branch. Tuning of what is considered "long" is handled
+/// through amdgpu-pre-ra-branch-distance cl argument that sets
+/// LongBranchFactor.
+//===----------------------------------------------------------------------===//
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
+#include <algorithm>
+#include <cmath>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-pre-ra-branch-distance"
+
+namespace {
+
+static cl::opt<float> LongBranchFactor(
+    "amdgpu-long-branch-factor", cl::init(1000.0), cl::Hidden,
+    cl::desc("Factor to apply to what qualifies as a long branch "
+             "to reserve a pair of scalar registers. If this value "
+             "is 0 the long branch registers are never reserved. As this "
+             "value grows the greater chance the branch distance will fall "
+             "within the threshold and the registers will be marked to be "
+             "reserved. We set the value high to lean towards always reserving "
+             "a register for long jumps"));
+
+class GCNPreRABranchDistance : public MachineFunctionPass {
+  /// BasicBlockInfo - Information about the offset and size of a single
+  /// basic block.
+  struct BasicBlockInfo {
+    /// Offset - Distance from the beginning of the function to the beginning
+    /// of this basic block.
+    ///
+    /// The offset is always aligned as required by the basic block.
+    unsigned Offset = 0;
+
+    /// Size - Size of the basic block in bytes.  If the block contains
+    /// inline assembly, this is a worst case estimate.
+    ///
+    /// The size does not include any alignment padding whether from the
+    /// beginning of the block, or from an aligned jump table at the end.
+    unsigned Size = 0;
+
+    BasicBlockInfo() = default;
+
+    /// Compute the offset immediately following this block. \p MBB is the next
+    /// block.
+    unsigned postOffset(const MachineBasicBlock &MBB) const {
+      const unsigned PO = Offset + Size;
+      const Align Alignment = MBB.getAlignment();
+      const Align ParentAlign = MBB.getParent()->getAlignment();
+      if (Alignment <= ParentAlign)
+        return alignTo(PO, Alignment);
+
+      // The alignment of this MBB is larger than the function's alignment, so
+      // we can't tell whether or not it will insert nops. Assume that it will.
+      return alignTo(PO, Alignment) + Alignment.value() - ParentAlign.value();
+    }
+  };
+  SmallVector<BasicBlockInfo, 16> BlockInfo;
+  MachineFunction *MF = nullptr;
+  const SIInstrInfo *TII = nullptr;
+  void scanFunction();
+  void adjustBlockOffsets(MachineBasicBlock &Start);
+  uint64_t computeBlockSize(const MachineBasicBlock &MBB) const;
+  unsigned getInstrOffset(const MachineInstr &MI) const;
+
+public:
+  static char ID;
+  GCNPreRABranchDistance() : MachineFunctionPass(ID) {
+    initializeGCNPreRABranchDistancePass(*PassRegistry::getPassRegistry());
+  }
+  void scanFunction(MachineFunction &MF);
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  StringRef getPassName() const override {
+    return "AMDGPU Pre-RA Branch Distance";
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+} // End anonymous namespace.
+char GCNPreRABranchDistance::ID = 0;
+
+INITIALIZE_PASS(GCNPreRABranchDistance, DEBUG_TYPE,
+                "AMDGPU Pre-RA Branch Distance", false, false)
+
+char &llvm::GCNPreRABranchDistanceID = GCNPreRABranchDistance::ID;
+
+/// scanFunction - Do the initial scan of the function, building up
+/// information about each block.
+void GCNPreRABranchDistance::scanFunction() {
+  BlockInfo.clear();
+  BlockInfo.resize(MF->getNumBlockIDs());
+
+  // First thing, compute the size of all basic blocks, and see if the function
+  // has any inline assembly in it. If so, we have to be conservative about
+  // alignment assumptions, as we don't know for sure the size of any
+  // instructions in the inline assembly.
+  for (MachineBasicBlock &MBB : *MF)
+    BlockInfo[MBB.getNumber()].Size = computeBlockSize(MBB);
+
+  // Compute block offsets and known bits.
+  adjustBlockOffsets(*MF->begin());
+}
+
+uint64_t
+GCNPreRABranchDistance::computeBlockSize(const MachineBasicBlock &MBB) const {
+  uint64_t CodeSize = 0;
+  for (const MachineInstr &MI : MBB)
+    CodeSize += TII->getInstSizeInBytes(MI);
+  return CodeSize;
+}
+
+void GCNPreRABranchDistance::adjustBlockOffsets(MachineBasicBlock &Start) {
+  unsigned PrevNum = Start.getNumber();
+  for (auto &MBB :
+       make_range(std::next(MachineFunction::iterator(Start)), MF->end())) {
+    unsigned Num = MBB.getNumber();
+    // Get the offset and known bits at the end of the layout predecessor.
+    // Include the alignment of the current block.
+    BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(MBB);
+
+    PrevNum = Num;
+  }
+}
+
+/// getInstrOffset - Return the current offset of the specified machine
+/// instruction from the start of the function.  This offset changes as stuff is
+/// moved around inside the function.
+unsigned GCNPreRABranchDistance::getInstrOffset(const MachineInstr &MI) const {
+  const MachineBasicBlock *MBB = MI.getParent();
+
+  // The offset is composed of two things: the sum of the sizes of all MBB's
+  // before this instruction's block, and the offset from the start of the block
+  // it is in.
+  unsigned Offset = BlockInfo[MBB->getNumber()].Offset;
+
+  // Sum instructions before MI in MBB.
+  for (MachineBasicBlock::const_iterator I = MBB->begin(); &*I != &MI; ++I) {
+    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+    Offset += TII->getInstSizeInBytes(*I);
+  }
+
+  return Offset;
+}
+
+bool GCNPreRABranchDistance::runOnMachineFunction(MachineFunction &Fn) {
+  MF = &Fn;
+  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
+  TII = STM.getInstrInfo();
+  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+
+  // Do the initial scan of the function, building up information about the
+  // sizes of each block.
+  scanFunction();
+
+  for (MachineBasicBlock &MBB : *MF) {
+    MachineBasicBlock::iterator Last = MBB.getLastNonDebugInstr();
+    if (Last == MBB.end() || !Last->isUnconditionalBranch())
+      continue;
+    MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last);
+    int64_t DestOffset = BlockInfo[DestBB->getNumber()].Offset;
+    int64_t SrcOffset = getInstrOffset(*Last);
+    int64_t Offset =
+        static_cast<int64_t>(LongBranchFactor * (DestOffset - SrcOffset));
+    // We assume that if the branch offset falls out of range here
+    // the branch is "long" and we need to reserve the register
+    if (!TII->isBranchOffsetInRange(Last->getOpcode(), Offset)) {
+      // For now, reserve highest available SGPR pair. After
+      // RA, shift down to a lower unused pair of SGPRs
+      Register Reg =
+          AMDGPU::SGPR_64RegClass.getRegister(STM.getMaxNumSGPRs(*MF) / 2 - 1);
+      MFI->setLongBranchReservedReg(Reg);
+      return true;
+    }
+  }
+  return false;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1426,6 +1426,7 @@
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+  bool RegsFrozen = false;
 
   if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
     // On gfx908, we had initially reserved highest available VGPR for AGPR
@@ -1442,8 +1443,25 @@
       // identified VGPR (for AGPR copy).
       FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
       MRI.freezeReservedRegs(MF);
+      RegsFrozen = true;
     }
   }
+
+  // We initally reserved the highest available SGPR pair for long branches
+  // now, after RA, we shift down to a lower unused one if one exists
+  if (FuncInfo->getLongBranchReservedReg()) {
+    Register UnusedLowSGPR =
+        TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
+    FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);
+    // Update reserved registers to include long branch ones
+    // if we've already called freezeReservedRegs above
+    // we can avoid recomputing the whole set of reserved regs and just call
+    // reserveReg instead
+    if (RegsFrozen)
+      MRI.reserveReg(UnusedLowSGPR, TRI);
+    else
+      MRI.freezeReservedRegs(MF);
+  }
 }
 
 // The special SGPR spills like the one needed for FP, BP or any reserved
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2538,6 +2538,7 @@
 
   MachineFunction *MF = MBB.getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
+  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
 
   // FIXME: Virtual register workaround for RegScavenger not working with empty
   // blocks.
@@ -2602,9 +2603,19 @@
   //   buzz;
 
   RS->enterBasicBlockEnd(MBB);
-  Register Scav = RS->scavengeRegisterBackwards(
-      AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC),
-      /* RestoreAfter */ false, 0, /* AllowSpill */ false);
+
+  Register LongBranchReservedReg = MFI->getLongBranchReservedReg();
+  Register Scav;
+
+  // If we've previously reserved a register for long branches
+  // avoid running the scavenger and just use those registers
+  if (LongBranchReservedReg)
+    Scav = LongBranchReservedReg;
+  else
+    Scav = RS->scavengeRegisterBackwards(
+        AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC),
+        /* RestoreAfter */ false, 0, /* AllowSpill */ false);
+
   if (Scav) {
     RS->setRegUsed(Scav);
     MRI.replaceRegWith(PCReg, Scav);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -283,6 +283,7 @@
   SIMode Mode;
   std::optional<FrameIndex> ScavengeFI;
   StringValue VGPRForAGPRCopy;
+  StringValue LongBranchReservedReg;
 
   SIMachineFunctionInfo() = default;
   SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
@@ -326,6 +327,8 @@
     YamlIO.mapOptional("scavengeFI", MFI.ScavengeFI);
     YamlIO.mapOptional("vgprForAGPRCopy", MFI.VGPRForAGPRCopy,
                        StringValue()); // Don't print out when it's empty.
+    YamlIO.mapOptional("longBranchReservedReg", MFI.LongBranchReservedReg,
+                       StringValue());
   }
 };
 
@@ -381,6 +384,11 @@
   // base to the beginning of the new function's frame.
   Register StackPtrOffsetReg = AMDGPU::SP_REG;
 
+  // Registers that may be reserved when RA doesn't allocate enough
+  // registers to plan for the case where an indirect branch ends up
+  // being needed during branch relaxation.
+  Register LongBranchReservedReg;
+
   AMDGPUFunctionArgInfo ArgInfo;
 
   // Graphics info.
@@ -891,6 +899,8 @@
     StackPtrOffsetReg = Reg;
   }
 
+  void setLongBranchReservedReg(Register Reg) { LongBranchReservedReg = Reg; }
+
   // Note the unset value for this is AMDGPU::SP_REG rather than
   // NoRegister. This is mostly a workaround for MIR tests where state that
   // can't be directly computed from the function is not preserved in serialized
@@ -899,6 +909,8 @@
     return StackPtrOffsetReg;
   }
 
+  Register getLongBranchReservedReg() const { return LongBranchReservedReg; }
+
   Register getQueuePtrUserSGPR() const {
     return ArgInfo.QueuePtr.getRegister();
   }
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -648,6 +648,8 @@
   for (Register Reg : MFI.getWWMReservedRegs())
     WWMReservedRegs.push_back(regToString(Reg, TRI));
 
+  if (MFI.getLongBranchReservedReg())
+    LongBranchReservedReg = regToString(MFI.getLongBranchReservedReg(), TRI);
   if (MFI.getVGPRForAGPRCopy())
     VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);
   auto SFI = MFI.getOptionalScavengeFI();
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -615,6 +615,10 @@
     reserveRegisterTuples(Reserved, ScratchRSrcReg);
   }
 
+  Register LongBranchReservedReg = MFI->getLongBranchReservedReg();
+  if (LongBranchReservedReg)
+    reserveRegisterTuples(Reserved, LongBranchReservedReg);
+
   // We have to assume the SP is needed in case there are calls in the function,
   // which is detected after the function is lowered. If we aren't really going
   // to need SP, don't bother reserving it.
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=5 -o - %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -o - %s | FileCheck %s
 
 define amdgpu_kernel void @spill(ptr addrspace(1) %arg, i32 %cnd) #0 {
 ; CHECK-LABEL: spill:
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 -simplifycfg-require-and-preserve-domtree=1 -amdgpu-long-branch-factor=0 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 
 ; FIXME: We should use llvm-mc for this, but we can't even parse our own output.
 ;        See PR33579.
-; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-s-branch-bits=4 -o %t.o -filetype=obj -simplifycfg-require-and-preserve-domtree=1 %s
+; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-s-branch-bits=4 -amdgpu-long-branch-factor=0 -o %t.o -filetype=obj -simplifycfg-require-and-preserve-domtree=1 %s
 ; RUN: llvm-readobj -r %t.o | FileCheck --check-prefix=OBJ %s
 
 ; OBJ:       Relocations [
diff --git a/llvm/test/CodeGen/AMDGPU/literal-constant-like-operand-instruction-size.ll b/llvm/test/CodeGen/AMDGPU/literal-constant-like-operand-instruction-size.ll
--- a/llvm/test/CodeGen/AMDGPU/literal-constant-like-operand-instruction-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/literal-constant-like-operand-instruction-size.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -amdgpu-s-branch-bits=6 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -amdgpu-s-branch-bits=6 -amdgpu-long-branch-factor=0 < %s | FileCheck -check-prefix=GCN %s
 
 
 ; Restrict maximum branch to between +31 and -32 dwords
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -120,6 +120,7 @@
 ; GCN-O0-NEXT:        Virtual Register Map
 ; GCN-O0-NEXT:        Live Register Matrix
 ; GCN-O0-NEXT:        SI Pre-allocate WWM Registers
+; GCN-O0-NEXT:        AMDGPU Pre-RA Branch Distance
 ; GCN-O0-NEXT:        Fast Register Allocator
 ; GCN-O0-NEXT:        SI lower SGPR spill instructions
 ; GCN-O0-NEXT:        Fast Register Allocator
@@ -356,6 +357,7 @@
 ; GCN-O1-NEXT:        Live Register Matrix
 ; GCN-O1-NEXT:        SI Pre-allocate WWM Registers
 ; GCN-O1-NEXT:        SI optimize exec mask operations pre-RA
+; GCN-O1-NEXT:        AMDGPU Pre-RA Branch Distance
 ; GCN-O1-NEXT:        Machine Natural Loop Construction
 ; GCN-O1-NEXT:        Machine Block Frequency Analysis
 ; GCN-O1-NEXT:        Debug Variable Analysis
@@ -665,6 +667,7 @@
 ; GCN-O1-OPTS-NEXT:        Live Register Matrix
 ; GCN-O1-OPTS-NEXT:        SI Pre-allocate WWM Registers
 ; GCN-O1-OPTS-NEXT:        SI optimize exec mask operations pre-RA
+; GCN-O1-OPTS-NEXT:        AMDGPU Pre-RA Branch Distance
 ; GCN-O1-OPTS-NEXT:        Machine Natural Loop Construction
 ; GCN-O1-OPTS-NEXT:        Machine Block Frequency Analysis
 ; GCN-O1-OPTS-NEXT:        Debug Variable Analysis
@@ -976,6 +979,7 @@
 ; GCN-O2-NEXT:        SI Pre-allocate WWM Registers
 ; GCN-O2-NEXT:        SI optimize exec mask operations pre-RA
 ; GCN-O2-NEXT:        SI Form memory clauses
+; GCN-O2-NEXT:        AMDGPU Pre-RA Branch Distance
 ; GCN-O2-NEXT:        Machine Natural Loop Construction
 ; GCN-O2-NEXT:        Machine Block Frequency Analysis
 ; GCN-O2-NEXT:        Debug Variable Analysis
@@ -1298,6 +1302,7 @@
 ; GCN-O3-NEXT:        SI Pre-allocate WWM Registers
 ; GCN-O3-NEXT:        SI optimize exec mask operations pre-RA
 ; GCN-O3-NEXT:        SI Form memory clauses
+; GCN-O3-NEXT:        AMDGPU Pre-RA Branch Distance
 ; GCN-O3-NEXT:        Machine Natural Loop Construction
 ; GCN-O3-NEXT:        Machine Block Frequency Analysis
 ; GCN-O3-NEXT:        Debug Variable Analysis
diff --git a/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll b/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll
@@ -0,0 +1,330 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-s-branch-bits=4 -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+
+; OBJ:       Relocations [
+; OBJ-NEXT: ]
+
+; Used to emit an always 4 byte instruction. Inline asm always assumes
+; each instruction is the maximum size.
+declare void @llvm.amdgcn.s.sleep(i32) #0
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+
+define amdgpu_kernel void @uniform_conditional_max_short_forward_branch(ptr addrspace(1) %arg, i32 %cnd) #0 {
+; GCN-LABEL: uniform_conditional_max_short_forward_branch:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_eq_u32 s2, 0
+; GCN-NEXT:    s_cbranch_scc1 .LBB0_2
+; GCN-NEXT:  ; %bb.1: ; %bb2
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_sleep 0
+; GCN-NEXT:  .LBB0_2: ; %bb3
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    v_mov_b32_e32 v0, s2
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_endpgm
+bb:
+  %cmp = icmp eq i32 %cnd, 0
+  br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch
+
+bb2:
+; 24 bytes
+  call void asm sideeffect
+  "v_nop_e64
+  v_nop_e64
+  v_nop_e64", ""() #0
+  call void @llvm.amdgcn.s.sleep(i32 0)
+  br label %bb3
+
+bb3:
+  store volatile i32 %cnd, ptr addrspace(1) %arg
+  ret void
+}
+
+define amdgpu_kernel void @uniform_conditional_min_long_forward_branch(ptr addrspace(1) %arg, i32 %cnd) #0 {
+; GCN-LABEL: uniform_conditional_min_long_forward_branch:
+; GCN:       ; %bb.0: ; %bb0
+; GCN-NEXT:  	 s_load_dword s2, s[0:1], 0xb
+; GCN-NEXT:  	 s_waitcnt lgkmcnt(0)
+; GCN-NEXT:  	 s_cmp_eq_u32 s2, 0
+; GCN-NEXT:  	 s_cbranch_scc0 .LBB1_1
+; GCN-NEXT:  .LBB1_3: ; %bb0
+; GCN-NEXT:  	 s_getpc_b64 s[8:9]
+; GCN-NEXT:  .Lpost_getpc0:
+; GCN-NEXT:  	 s_add_u32 s8, s8, (.LBB1_2-.Lpost_getpc0)&4294967295
+; GCN-NEXT:  	 s_addc_u32 s9, s9, (.LBB1_2-.Lpost_getpc0)>>32
+; GCN-NEXT:  	 s_setpc_b64 s[8:9]
+; GCN-NEXT:  .LBB1_1: ; %bb2
+; GCN-NEXT:  	 ;;#ASMSTART
+; GCN-NEXT:  	 v_nop_e64
+; GCN-NEXT:     v_nop_e64
+; GCN-NEXT:     v_nop_e64
+; GCN-NEXT:     v_nop_e64
+; GCN-NEXT:  	 ;;#ASMEND
+; GCN-NEXT:  .LBB1_2: ; %bb3
+; GCN-NEXT:  	 s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GCN-NEXT:  	 s_mov_b32 s7, 0xf000
+; GCN-NEXT:  	 s_mov_b32 s6, -1
+; GCN-NEXT:  	 v_mov_b32_e32 v0, s2
+; GCN-NEXT:  	 s_waitcnt lgkmcnt(0)
+; GCN-NEXT:  	 buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:  	 s_waitcnt vmcnt(0)
+; GCN-NEXT:  	 s_endpgm
+bb0:
+  %cmp = icmp eq i32 %cnd, 0
+  br i1 %cmp, label %bb3, label %bb2 ; +9 dword branch
+
+bb2:
+; 32 bytes
+  call void asm sideeffect
+  "v_nop_e64
+  v_nop_e64
+  v_nop_e64
+  v_nop_e64", ""() #0
+  br label %bb3
+
+bb3:
+  store volatile i32 %cnd, ptr addrspace(1) %arg
+  ret void
+}
+
+define amdgpu_kernel void @uniform_conditional_min_long_forward_vcnd_branch(ptr addrspace(1) %arg, float %cnd) #0 {
+; GCN-LABEL: uniform_conditional_min_long_forward_vcnd_branch:
+; GCN:       ; %bb.0: ; %bb0
+; GCN-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_cmp_eq_f32_e64 s[4:5], s2, 0
+; GCN-NEXT:    s_and_b64 vcc, exec, s[4:5]
+; GCN-NEXT:    s_cbranch_vccz .LBB2_1
+; GCN-NEXT:  .LBB2_3: ; %bb0
+; GCN-NEXT:    s_getpc_b64 s[8:9]
+; GCN-NEXT:  .Lpost_getpc1:
+; GCN-NEXT:    s_add_u32 s8, s8, (.LBB2_2-.Lpost_getpc1)&4294967295
+; GCN-NEXT:    s_addc_u32 s9, s9, (.LBB2_2-.Lpost_getpc1)>>32
+; GCN-NEXT:    s_setpc_b64 s[8:9]
+; GCN-NEXT:  .LBB2_1: ; %bb2
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:     ; 32 bytes
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:  .LBB2_2: ; %bb3
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    v_mov_b32_e32 v0, s2
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_endpgm
+bb0:
+  %cmp = fcmp oeq float %cnd, 0.0
+  br i1 %cmp, label %bb3, label %bb2 ; + 8 dword branch
+
+bb2:
+  call void asm sideeffect " ; 32 bytes
+  v_nop_e64
+  v_nop_e64
+  v_nop_e64
+  v_nop_e64", ""() #0
+  br label %bb3
+
+bb3:
+  store volatile float %cnd, ptr addrspace(1) %arg
+  ret void
+}
+
+define amdgpu_kernel void @min_long_forward_vbranch(ptr addrspace(1) %arg) #0 {
+; GCN-LABEL: min_long_forward_vbranch:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    s_mov_b32 s2, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v1, s1
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, s0, v0
+; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; GCN-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GCN-NEXT:    s_cbranch_execnz .LBB3_1
+; GCN-NEXT:  .LBB3_3: ; %bb
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:  .Lpost_getpc2:
+; GCN-NEXT:    s_add_u32 s4, s4, (.LBB3_2-.Lpost_getpc2)&4294967295
+; GCN-NEXT:    s_addc_u32 s5, s5, (.LBB3_2-.Lpost_getpc2)>>32
+; GCN-NEXT:    s_setpc_b64 s[4:5]
+; GCN-NEXT:  .LBB3_1: ; %bb2
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:     ; 32 bytes
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:  .LBB3_2: ; %bb3
+; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GCN-NEXT:    s_mov_b32 s0, s2
+; GCN-NEXT:    s_mov_b32 s1, s2
+; GCN-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_endpgm
+bb:
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %tid.ext = zext i32 %tid to i64
+  %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tid.ext
+  %load = load volatile i32, ptr addrspace(1) %gep
+  %cmp = icmp eq i32 %load, 0
+  br i1 %cmp, label %bb3, label %bb2 ; + 8 dword branch
+
+bb2:
+  call void asm sideeffect " ; 32 bytes
+  v_nop_e64
+  v_nop_e64
+  v_nop_e64
+  v_nop_e64", ""() #0
+  br label %bb3
+
+bb3:
+  store volatile i32 %load, ptr addrspace(1) %gep
+  ret void
+}
+
+define amdgpu_kernel void @long_backward_sbranch(ptr addrspace(1) %arg) #0 {
+; GCN-LABEL: long_backward_sbranch:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0
+; GCN-NEXT:  .LBB4_1: ; %bb2
+; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    s_add_i32 s0, s0, 1
+; GCN-NEXT:    s_cmp_lt_i32 s0, 10
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_cbranch_scc0 .LBB4_2
+; GCN-NEXT:  .LBB4_3: ; %bb2
+; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
+; GCN-NEXT:    s_getpc_b64 s[2:3]
+; GCN-NEXT:  .Lpost_getpc3:
+; GCN-NEXT:    s_add_u32 s2, s2, (.LBB4_1-.Lpost_getpc3)&4294967295
+; GCN-NEXT:    s_addc_u32 s3, s3, (.LBB4_1-.Lpost_getpc3)>>32
+; GCN-NEXT:    s_setpc_b64 s[2:3]
+; GCN-NEXT:  .LBB4_2: ; %bb3
+; GCN-NEXT:    s_endpgm
+
+bb:
+  br label %bb2
+
+bb2:
+  %loop.idx = phi i32 [ 0, %bb ], [ %inc, %bb2 ]
+  ; 24 bytes
+  call void asm sideeffect
+  "v_nop_e64
+  v_nop_e64
+  v_nop_e64", ""() #0
+  %inc = add nsw i32 %loop.idx, 1 ; add cost 4
+  %cmp = icmp slt i32 %inc, 10 ; condition cost = 8
+  br i1 %cmp, label %bb2, label %bb3 ; -
+
+bb3:
+  ret void
+}
+
+; Requires expansion of unconditional branch from %bb2 to %bb4 (and
+; expansion of conditional branch from %bb to %bb3.
+
+define amdgpu_kernel void @uniform_unconditional_min_long_forward_branch(ptr addrspace(1) %arg, i32 %arg1) {
+; GCN-LABEL: uniform_unconditional_min_long_forward_branch:
+; GCN:       ; %bb.0: ; %bb0
+; GCN-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_eq_u32 s2, 0
+; GCN-NEXT:    s_mov_b64 s[2:3], -1
+; GCN-NEXT:    s_cbranch_scc0 .LBB5_1
+; GCN-NEXT:  .LBB5_7: ; %bb0
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:  .Lpost_getpc5:
+; GCN-NEXT:    s_add_u32 s4, s4, (.LBB5_4-.Lpost_getpc5)&4294967295
+; GCN-NEXT:    s_addc_u32 s5, s5, (.LBB5_4-.Lpost_getpc5)>>32
+; GCN-NEXT:    s_setpc_b64 s[4:5]
+; GCN-NEXT:  .LBB5_1: ; %Flow
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[2:3]
+; GCN-NEXT:    s_cbranch_vccnz .LBB5_3
+; GCN-NEXT:  .LBB5_2: ; %bb2
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    v_mov_b32_e32 v0, 17
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:  .LBB5_3: ; %bb4
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_waitcnt expcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, 63
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_endpgm
+; GCN-NEXT:  .LBB5_4: ; %bb3
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    v_nop_e64
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:  s_mov_b64 vcc, exec
+; GCN-NEXT:    s_cbranch_execnz .LBB5_5
+; GCN-NEXT:  .LBB5_9: ; %bb3
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:  .Lpost_getpc6:
+; GCN-NEXT:    s_add_u32 s4, s4, (.LBB5_2-.Lpost_getpc6)&4294967295
+; GCN-NEXT:    s_addc_u32 s5, s5, (.LBB5_2-.Lpost_getpc6)>>32
+; GCN-NEXT:    s_setpc_b64 s[4:5]
+; GCN-NEXT:  .LBB5_5: ; %bb3
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:  .Lpost_getpc4:
+; GCN-NEXT:    s_add_u32 s4, s4, (.LBB5_3-.Lpost_getpc4)&4294967295
+; GCN-NEXT:    s_addc_u32 s5, s5, (.LBB5_3-.Lpost_getpc4)>>32
+; GCN-NEXT:    s_setpc_b64 s[4:5]
+bb0:
+  %tmp = icmp ne i32 %arg1, 0
+  br i1 %tmp, label %bb2, label %bb3
+
+bb2:
+  store volatile i32 17, ptr addrspace(1) undef
+  br label %bb4
+
+bb3:
+  ; 32 byte asm
+  call void asm sideeffect
+  "v_nop_e64
+  v_nop_e64
+  v_nop_e64
+  v_nop_e64", ""() #0
+  br label %bb4
+
+bb4:
+  store volatile i32 63, ptr addrspace(1) %arg
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
@@ -39,6 +39,7 @@
 ; AFTER-PEI-NEXT: occupancy: 5
 ; AFTER-PEI-NEXT: scavengeFI: '%fixed-stack.0'
 ; AFTER-PEI-NEXT: vgprForAGPRCopy: ''
+; AFTER-PEI-NEXT: longBranchReservedReg: ''
 ; AFTER-PEI-NEXT: body:
 define amdgpu_kernel void @scavenge_fi(ptr addrspace(1) %out, i32 %in) #0 {
   %wide.sgpr0 = call <32 x i32>  asm sideeffect "; def $0", "=s" () #0
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
@@ -0,0 +1,68 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -amdgpu-s-branch-bits=4 -stop-after=branch-relaxation -verify-machineinstrs %s -o - | FileCheck %s
+
+; Test that long branch reserved register is serialized through
+; MIR.
+
+; CHECK-LABEL: {{^}}name: uniform_long_forward_branch
+; CHECK: machineFunctionInfo:
+; CHECK-NEXT: explicitKernArgSize: 12
+; CHECK-NEXT: maxKernArgAlign: 8
+; CHECK-NEXT: ldsSize: 0
+; CHECK-NEXT: gdsSize: 0
+; CHECK-NEXT: dynLDSAlign: 1
+; CHECK-NEXT: isEntryFunction: true
+; CHECK-NEXT: noSignedZerosFPMath: false
+; CHECK-NEXT: memoryBound: false
+; CHECK-NEXT: waveLimiter: false
+; CHECK-NEXT: hasSpilledSGPRs: false
+; CHECK-NEXT: hasSpilledVGPRs: false
+; CHECK-NEXT: scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+; CHECK-NEXT: frameOffsetReg:  '$fp_reg'
+; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32'
+; CHECK-NEXT: bytesInStackArgArea: 0
+; CHECK-NEXT: returnsVoid:     true
+; CHECK-NEXT: argumentInfo:
+; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+; CHECK-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; CHECK-NEXT: workGroupIDX:    { reg: '$sgpr6' }
+; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+; CHECK-NEXT: workItemIDX:     { reg: '$vgpr0' }
+; CHECK-NEXT: psInputAddr:     0
+; CHECK-NEXT: psInputEnable:   0
+; CHECK-NEXT: mode:
+; CHECK-NEXT: ieee:            true
+; CHECK-NEXT: dx10-clamp:      true
+; CHECK-NEXT: fp32-input-denormals: true
+; CHECK-NEXT: fp32-output-denormals: true
+; CHECK-NEXT: fp64-fp16-input-denormals: true
+; CHECK-NEXT: fp64-fp16-output-denormals: true
+; CHECK-NEXT: BitsOf32BitAddress: 0
+; CHECK-NEXT: occupancy:       8
+; CHECK-NEXT: vgprForAGPRCopy: ''
+; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3'
+; CHECK-NEXT: body:
+define amdgpu_kernel void @uniform_long_forward_branch(ptr addrspace(1) %arg, i32 %arg1) {
+bb0:
+  %tmp = icmp ne i32 %arg1, 0
+  br i1 %tmp, label %bb2, label %bb3
+
+bb2:
+  store volatile i32 17, ptr addrspace(1) undef
+  br label %bb4
+
+bb3:
+  ; 32 byte asm
+  call void asm sideeffect
+  "v_nop_e64
+  v_nop_e64
+  v_nop_e64
+  v_nop_e64", ""() #0
+  br label %bb4
+
+bb4:
+  store volatile i32 63, ptr addrspace(1) %arg
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -48,6 +48,7 @@
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT:  occupancy: 8
 # FULL-NEXT:  vgprForAGPRCopy: ''
+# FULL-NEXT:  longBranchReservedReg: ''
 # FULL-NEXT: body:
 
 # SIMPLE: machineFunctionInfo:
@@ -148,6 +149,7 @@
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT:  occupancy: 8
 # FULL-NEXT: vgprForAGPRCopy: ''
+# FULL-NEXT: longBranchReservedReg: ''
 # FULL-NEXT: body:
 
 # SIMPLE: machineFunctionInfo:
@@ -219,6 +221,7 @@
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT:  occupancy: 8
 # FULL-NEXT: vgprForAGPRCopy: ''
+# FULL-NEXT: longBranchReservedReg: ''
 # FULL-NEXT: body:
 
 # SIMPLE: machineFunctionInfo:
@@ -291,6 +294,7 @@
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT:  occupancy: 8
 # FULL-NEXT: vgprForAGPRCopy: ''
+# FULL-NEXT: longBranchReservedReg: ''
 # FULL-NEXT: body:
 
 # SIMPLE: machineFunctionInfo:
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -42,6 +42,7 @@
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy: 8
 ; CHECK-NEXT: vgprForAGPRCopy: ''
+; CHECK-NEXT: longBranchReservedReg: ''
 ; CHECK-NEXT: body:
 define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
   %gep = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %arg0
@@ -84,6 +85,7 @@
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy: 10
 ; CHECK-NEXT: vgprForAGPRCopy: ''
+; CHECK-NEXT: longBranchReservedReg: ''
 ; CHECK-NEXT: body:
 define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
   %gep = getelementptr inbounds [128 x i32], ptr addrspace(2) @gds, i32 0, i32 %arg0
@@ -150,6 +152,7 @@
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy: 8
 ; CHECK-NEXT: vgprForAGPRCopy: ''
+; CHECK-NEXT: longBranchReservedReg: ''
 ; CHECK-NEXT: body:
 define void @function() {
   ret void
@@ -198,6 +201,7 @@
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy: 8
 ; CHECK-NEXT: vgprForAGPRCopy: ''
+; CHECK-NEXT: longBranchReservedReg: ''
 ; CHECK-NEXT: body:
 define void @function_nsz() #0 {
   ret void