diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -15,8 +15,9 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
 
 #include "AMDGPU.h"
-#include "AMDKernelCodeT.h"
 #include "AMDGPUHSAMetadataStreamer.h"
+#include "AMDKernelCodeT.h"
+#include "SIFunctionResourceInfoTracker.h"
 #include "SIProgramInfo.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/AsmPrinter.h"
@@ -38,32 +39,14 @@
 
 class AMDGPUAsmPrinter final : public AsmPrinter {
 private:
-  // Track resource usage for callee functions.
-  struct SIFunctionResourceInfo {
-    // Track the number of explicitly used VGPRs. Special registers reserved at
-    // the end are tracked separately.
-    int32_t NumVGPR = 0;
-    int32_t NumAGPR = 0;
-    int32_t NumExplicitSGPR = 0;
-    uint64_t PrivateSegmentSize = 0;
-    bool UsesVCC = false;
-    bool UsesFlatScratch = false;
-    bool HasDynamicallySizedStack = false;
-    bool HasRecursion = false;
-
-    int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const;
-    int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const;
-  };
-
   SIProgramInfo CurrentProgramInfo;
-  DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
 
   std::unique_ptr<AMDGPU::HSAMD::MetadataStreamer> HSAMetadataStream;
 
   MCCodeEmitter *DumpCodeInstEmitter = nullptr;
 
   uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
-  SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF) const;
+  std::unique_ptr<SIFunctionResourceInfo> SIFuncResourceInfo;
 
   void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
   void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
@@ -94,6 +77,7 @@
       const SIProgramInfo &PI) const;
 
 public:
+  std::unique_ptr<SIFunctionResourceInfoTracker> ResourceTracker;
   explicit AMDGPUAsmPrinter(TargetMachine &TM,
                             std::unique_ptr<MCStreamer> Streamer);
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -49,22 +49,6 @@
 using namespace llvm::AMDGPU;
 using namespace llvm::AMDGPU::HSAMD;
 
-// We need to tell the runtime some amount ahead of time if we don't know the
-// true stack size. Assume a smaller number if this is only due to dynamic /
-// non-entry block allocas.
-static cl::opt<uint32_t> AssumedStackSizeForExternalCall(
-  "amdgpu-assume-external-call-stack-size",
-  cl::desc("Assumed stack use of any external call (in bytes)"),
-  cl::Hidden,
-  cl::init(16384));
-
-static cl::opt<uint32_t> AssumedStackSizeForDynamicSizeObjects(
-  "amdgpu-assume-dynamic-stack-object-size",
-  cl::desc("Assumed extra stack use if there are any "
-           "variable sized objects (in bytes)"),
-  cl::Hidden,
-  cl::init(4096));
-
 // This should get the default rounding mode from the kernel. We just set the
 // default here, but this could change if the OpenCL rounding mode pragmas are
 // used.
@@ -108,10 +92,11 @@
 AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
                                    std::unique_ptr<MCStreamer> Streamer)
   : AsmPrinter(TM, std::move(Streamer)) {
-    if (IsaInfo::hasCodeObjectV3(getGlobalSTI()))
-      HSAMetadataStream.reset(new MetadataStreamerV3());
-    else
-      HSAMetadataStream.reset(new MetadataStreamerV2());
+  ResourceTracker = std::make_unique<SIFunctionResourceInfoTracker>();
+  if (IsaInfo::hasCodeObjectV3(getGlobalSTI()))
+    HSAMetadataStream.reset(new MetadataStreamerV3());
+  else
+    HSAMetadataStream.reset(new MetadataStreamerV2());
 }
 
 StringRef AMDGPUAsmPrinter::getPassName() const {
@@ -330,7 +315,7 @@
 }
 
 bool AMDGPUAsmPrinter::doFinalization(Module &M) {
-  CallGraphResourceInfo.clear();
+  ResourceTracker->CallGraphResourceMap.clear();
 
   // Pad with s_code_end to help tools and guard against instruction prefetch
   // causing stale data in caches. Arguably this should be done by the linker,
@@ -447,11 +432,20 @@
   if (MFI->isEntryFunction()) {
     getSIProgramInfo(CurrentProgramInfo, MF);
   } else {
-    auto I = CallGraphResourceInfo.insert(
-      std::make_pair(&MF.getFunction(), SIFunctionResourceInfo()));
-    SIFunctionResourceInfo &Info = I.first->second;
+    // TODO: We add an empty resource info for the
+    // function, take a pointer to it and then update it
+    // based on what analyzeResourceInfo returns.
+    // This is hacky. Ideally, tracker should update it internally
+    // based on a flag. This also needs to be carefully taken
+    // care by in case of recursion becuase we don't want to
+    // add calle's resource usage to caller since caller
+    // is callee in case of recursion!
+    auto I = ResourceTracker->CallGraphResourceMap.insert(
+        std::make_pair(&MF.getFunction(),
+                       std::move(std::make_unique<SIFunctionResourceInfo>())));
+    auto &Info = I.first->second;
     assert(I.second && "should only be called once per function");
-    Info = analyzeResourceUsage(MF);
+    Info = std::move(ResourceTracker->analyzeResourceUsage(MF));
   }
 
   if (STM.isAmdPalOS())
@@ -485,14 +479,13 @@
 
     if (!MFI->isEntryFunction()) {
       OutStreamer->emitRawComment(" Function info:", false);
-      SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()];
+      auto &Info = ResourceTracker->CallGraphResourceMap[&MF.getFunction()];
       emitCommonFunctionComments(
-        Info.NumVGPR,
-        STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(),
-        Info.getTotalNumVGPRs(STM),
-        Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
-        Info.PrivateSegmentSize,
-        getFunctionCodeSize(MF), MFI);
+          Info->getNumVGPR(),
+          STM.hasMAIInsts() ? Info->getNumAGPR() : Optional<uint32_t>(),
+          Info->getTotalNumVGPRs(STM),
+          Info->getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
+          Info->getPrivateSegmentSize(), getFunctionCodeSize(MF), MFI);
       return false;
     }
 
@@ -595,401 +588,20 @@
   return CodeSize;
 }
 
-static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
-                                  const SIInstrInfo &TII,
-                                  unsigned Reg) {
-  for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
-    if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
-      return true;
-  }
-
-  return false;
-}
-
-int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
-  const GCNSubtarget &ST) const {
-  return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
-                                                     UsesVCC, UsesFlatScratch);
-}
-
-int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs(
-  const GCNSubtarget &ST) const {
-  return std::max(NumVGPR, NumAGPR);
-}
-
-static const Function *getCalleeFunction(const MachineOperand &Op) {
-  if (Op.isImm()) {
-    assert(Op.getImm() == 0);
-    return nullptr;
-  }
-
-  return cast<Function>(Op.getGlobal());
-}
-
-AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
-  const MachineFunction &MF) const {
-  SIFunctionResourceInfo Info;
-
-  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-  const SIInstrInfo *TII = ST.getInstrInfo();
-  const SIRegisterInfo &TRI = TII->getRegisterInfo();
-
-  Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
-                         MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI);
-
-  // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
-  // instructions aren't used to access the scratch buffer. Inline assembly may
-  // need it though.
-  //
-  // If we only have implicit uses of flat_scr on flat instructions, it is not
-  // really needed.
-  if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
-      (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
-       !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
-       !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
-    Info.UsesFlatScratch = false;
-  }
-
-  Info.PrivateSegmentSize = FrameInfo.getStackSize();
-
-  // Assume a big number if there are any unknown sized objects.
-  Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
-  if (Info.HasDynamicallySizedStack)
-    Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
-
-  if (MFI->isStackRealigned())
-    Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
-
-  Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
-                 MRI.isPhysRegUsed(AMDGPU::VCC_HI);
-
-  // If there are no calls, MachineRegisterInfo can tell us the used register
-  // count easily.
-  // A tail call isn't considered a call for MachineFrameInfo's purposes.
-  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
-    MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
-    for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
-      if (MRI.isPhysRegUsed(Reg)) {
-        HighestVGPRReg = Reg;
-        break;
-      }
-    }
-
-    if (ST.hasMAIInsts()) {
-      MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
-      for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
-        if (MRI.isPhysRegUsed(Reg)) {
-          HighestAGPRReg = Reg;
-          break;
-        }
-      }
-      Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister ? 0 :
-        TRI.getHWRegIndex(HighestAGPRReg) + 1;
-    }
-
-    MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
-    for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
-      if (MRI.isPhysRegUsed(Reg)) {
-        HighestSGPRReg = Reg;
-        break;
-      }
-    }
-
-    // We found the maximum register index. They start at 0, so add one to get the
-    // number of registers.
-    Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 :
-      TRI.getHWRegIndex(HighestVGPRReg) + 1;
-    Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 :
-      TRI.getHWRegIndex(HighestSGPRReg) + 1;
-
-    return Info;
-  }
-
-  int32_t MaxVGPR = -1;
-  int32_t MaxAGPR = -1;
-  int32_t MaxSGPR = -1;
-  uint64_t CalleeFrameSize = 0;
-
-  for (const MachineBasicBlock &MBB : MF) {
-    for (const MachineInstr &MI : MBB) {
-      // TODO: Check regmasks? Do they occur anywhere except calls?
-      for (const MachineOperand &MO : MI.operands()) {
-        unsigned Width = 0;
-        bool IsSGPR = false;
-        bool IsAGPR = false;
-
-        if (!MO.isReg())
-          continue;
-
-        Register Reg = MO.getReg();
-        switch (Reg) {
-        case AMDGPU::EXEC:
-        case AMDGPU::EXEC_LO:
-        case AMDGPU::EXEC_HI:
-        case AMDGPU::SCC:
-        case AMDGPU::M0:
-        case AMDGPU::SRC_SHARED_BASE:
-        case AMDGPU::SRC_SHARED_LIMIT:
-        case AMDGPU::SRC_PRIVATE_BASE:
-        case AMDGPU::SRC_PRIVATE_LIMIT:
-        case AMDGPU::SGPR_NULL:
-        case AMDGPU::MODE:
-          continue;
-
-        case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
-          llvm_unreachable("src_pops_exiting_wave_id should not be used");
-
-        case AMDGPU::NoRegister:
-          assert(MI.isDebugInstr());
-          continue;
-
-        case AMDGPU::VCC:
-        case AMDGPU::VCC_LO:
-        case AMDGPU::VCC_HI:
-        case AMDGPU::VCC_LO_LO16:
-        case AMDGPU::VCC_LO_HI16:
-        case AMDGPU::VCC_HI_LO16:
-        case AMDGPU::VCC_HI_HI16:
-          Info.UsesVCC = true;
-          continue;
-
-        case AMDGPU::FLAT_SCR:
-        case AMDGPU::FLAT_SCR_LO:
-        case AMDGPU::FLAT_SCR_HI:
-          continue;
-
-        case AMDGPU::XNACK_MASK:
-        case AMDGPU::XNACK_MASK_LO:
-        case AMDGPU::XNACK_MASK_HI:
-          llvm_unreachable("xnack_mask registers should not be used");
-
-        case AMDGPU::LDS_DIRECT:
-          llvm_unreachable("lds_direct register should not be used");
-
-        case AMDGPU::TBA:
-        case AMDGPU::TBA_LO:
-        case AMDGPU::TBA_HI:
-        case AMDGPU::TMA:
-        case AMDGPU::TMA_LO:
-        case AMDGPU::TMA_HI:
-          llvm_unreachable("trap handler registers should not be used");
-
-        case AMDGPU::SRC_VCCZ:
-          llvm_unreachable("src_vccz register should not be used");
-
-        case AMDGPU::SRC_EXECZ:
-          llvm_unreachable("src_execz register should not be used");
-
-        case AMDGPU::SRC_SCC:
-          llvm_unreachable("src_scc register should not be used");
-
-        default:
-          break;
-        }
-
-        if (AMDGPU::SReg_32RegClass.contains(Reg) ||
-            AMDGPU::SReg_LO16RegClass.contains(Reg) ||
-            AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
-          assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
-                 "trap handler registers should not be used");
-          IsSGPR = true;
-          Width = 1;
-        } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
-                   AMDGPU::VGPR_LO16RegClass.contains(Reg) ||
-                   AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
-          IsSGPR = false;
-          Width = 1;
-        } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
-                   AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
-          IsSGPR = false;
-          IsAGPR = true;
-          Width = 1;
-        } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
-          assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
-                 "trap handler registers should not be used");
-          IsSGPR = true;
-          Width = 2;
-        } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
-          IsSGPR = false;
-          Width = 2;
-        } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
-          IsSGPR = false;
-          IsAGPR = true;
-          Width = 2;
-        } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
-          IsSGPR = false;
-          Width = 3;
-        } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
-          IsSGPR = true;
-          Width = 3;
-        } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
-          IsSGPR = false;
-          IsAGPR = true;
-          Width = 3;
-        } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
-          assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
-            "trap handler registers should not be used");
-          IsSGPR = true;
-          Width = 4;
-        } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
-          IsSGPR = false;
-          Width = 4;
-        } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
-          IsSGPR = false;
-          IsAGPR = true;
-          Width = 4;
-        } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
-          IsSGPR = false;
-          Width = 5;
-        } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
-          IsSGPR = true;
-          Width = 5;
-        } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
-          IsSGPR = false;
-          IsAGPR = true;
-          Width = 5;
-        } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
-          IsSGPR = false;
-          Width = 6;
-        } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
-          IsSGPR = true;
-          Width = 6;
-        } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
-          IsSGPR = false;
-          IsAGPR = true;
-          Width = 6;
-        } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
-          assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
-            "trap handler registers should not be used");
-          IsSGPR = true;
-          Width = 8;
-        } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
-          IsSGPR = false;
-          Width = 8;
-        } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
-          IsSGPR = false;
-          IsAGPR = true;
-          Width = 8;
-        } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
-          assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
-            "trap handler registers should not be used");
-          IsSGPR = true;
-          Width = 16;
-        } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
-          IsSGPR = false;
-          Width = 16;
-        } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
-          IsSGPR = false;
-          IsAGPR = true;
-          Width = 16;
-        } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
-          IsSGPR = true;
-          Width = 32;
-        } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
-          IsSGPR = false;
-          Width = 32;
-        } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
-          IsSGPR = false;
-          IsAGPR = true;
-          Width = 32;
-        } else {
-          llvm_unreachable("Unknown register class");
-        }
-        unsigned HWReg = TRI.getHWRegIndex(Reg);
-        int MaxUsed = HWReg + Width - 1;
-        if (IsSGPR) {
-          MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
-        } else if (IsAGPR) {
-          MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
-        } else {
-          MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
-        }
-      }
-
-      if (MI.isCall()) {
-        // Pseudo used just to encode the underlying global. Is there a better
-        // way to track this?
-
-        const MachineOperand *CalleeOp
-          = TII->getNamedOperand(MI, AMDGPU::OpName::callee);
-
-        const Function *Callee = getCalleeFunction(*CalleeOp);
-        if (!Callee || Callee->isDeclaration()) {
-          // If this is a call to an external function, we can't do much. Make
-          // conservative guesses.
-
-          // 48 SGPRs - vcc, - flat_scr, -xnack
-          int MaxSGPRGuess =
-            47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
-          MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
-          MaxVGPR = std::max(MaxVGPR, 23);
-          MaxAGPR = std::max(MaxAGPR, 23);
-
-          CalleeFrameSize = std::max(CalleeFrameSize,
-            static_cast<uint64_t>(AssumedStackSizeForExternalCall));
-
-          Info.UsesVCC = true;
-          Info.UsesFlatScratch = ST.hasFlatAddressSpace();
-          Info.HasDynamicallySizedStack = true;
-        } else {
-          // We force CodeGen to run in SCC order, so the callee's register
-          // usage etc. should be the cumulative usage of all callees.
-
-          auto I = CallGraphResourceInfo.find(Callee);
-          if (I == CallGraphResourceInfo.end()) {
-            // Avoid crashing on undefined behavior with an illegal call to a
-            // kernel. If a callsite's calling convention doesn't match the
-            // function's, it's undefined behavior. If the callsite calling
-            // convention does match, that would have errored earlier.
-            // FIXME: The verifier shouldn't allow this.
-            if (AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
-              report_fatal_error("invalid call to entry function");
-
-            llvm_unreachable("callee should have been handled before caller");
-          }
-
-          MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
-          MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
-          MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
-          CalleeFrameSize
-            = std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
-          Info.UsesVCC |= I->second.UsesVCC;
-          Info.UsesFlatScratch |= I->second.UsesFlatScratch;
-          Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
-          Info.HasRecursion |= I->second.HasRecursion;
-        }
-
-        // FIXME: Call site could have norecurse on it
-        if (!Callee || !Callee->doesNotRecurse())
-          Info.HasRecursion = true;
-      }
-    }
-  }
-
-  Info.NumExplicitSGPR = MaxSGPR + 1;
-  Info.NumVGPR = MaxVGPR + 1;
-  Info.NumAGPR = MaxAGPR + 1;
-  Info.PrivateSegmentSize += CalleeFrameSize;
-
-  return Info;
-}
-
 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
                                         const MachineFunction &MF) {
-  SIFunctionResourceInfo Info = analyzeResourceUsage(MF);
+  auto Info = ResourceTracker->analyzeResourceUsage(MF);
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
 
-  ProgInfo.NumArchVGPR = Info.NumVGPR;
-  ProgInfo.NumAccVGPR = Info.NumAGPR;
-  ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
-  ProgInfo.NumSGPR = Info.NumExplicitSGPR;
-  ProgInfo.ScratchSize = Info.PrivateSegmentSize;
-  ProgInfo.VCCUsed = Info.UsesVCC;
-  ProgInfo.FlatUsed = Info.UsesFlatScratch;
-  ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
+  ProgInfo.NumArchVGPR = Info->getNumVGPR();
+  ProgInfo.NumAccVGPR = Info->getNumAGPR();
+  ProgInfo.NumVGPR = Info->getTotalNumVGPRs(STM);
+  ProgInfo.NumSGPR = Info->getNumExplicitSGPR();
+  ProgInfo.ScratchSize = Info->getPrivateSegmentSize();
+  ProgInfo.VCCUsed = Info->usesVCC();
+  ProgInfo.FlatUsed = Info->usesFlatScratch();
+  ProgInfo.DynamicCallStack =
+      Info->hasDynamicallySizedStack() || Info->hasRecursion();
 
   if (!isUInt<32>(ProgInfo.ScratchSize)) {
     DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -115,6 +115,7 @@
   SIInsertWaitcnts.cpp
   SIInstrInfo.cpp
   SIISelLowering.cpp
+  SIFunctionResourceInfoTracker.cpp
   SILoadStoreOptimizer.cpp
   SILowerControlFlow.cpp
   SILowerI1Copies.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIFunctionResourceInfoTracker.h b/llvm/lib/Target/AMDGPU/SIFunctionResourceInfoTracker.h
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIFunctionResourceInfoTracker.h
@@ -0,0 +1,119 @@
+//===-- AMDGPUSIFunctionResourceTracker.h - Track resource usage for functions
+//---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// AMDGPU function resource info tracker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIFUNCTIONRESOURCETRACKER_H
+#define LLVM_LIB_TARGET_AMDGPU_SIFUNCTIONRESOURCETRACKER_H
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+
+// Track resource usage for callee functions.
+// Track the number of explicitly used VGPRs. Special registers reserved at
+// the end are tracked separately.
+namespace llvm {
+
+class AMDGPUMachineFunction;
+class GCNSubtarget;
+
+class SIFunctionResourceInfo {
+private:
+  int32_t NumVGPR;
+  int32_t NumAGPR;
+  int32_t NumExplicitSGPR;
+  uint64_t PrivateSegmentSize;
+  bool UsesVCC;
+  bool UsesFlatScratch;
+  bool HasDynamicallySizedStack;
+  bool HasRecursion;
+  StringRef DebugName;
+
+public:
+  SIFunctionResourceInfo()
+      : NumVGPR(0), NumAGPR(0), NumExplicitSGPR(0), PrivateSegmentSize(0),
+        UsesVCC(false), UsesFlatScratch(false), HasDynamicallySizedStack(false),
+        HasRecursion(false), DebugName("") {}
+
+  void setName(StringRef Name) { DebugName = Name; }
+  StringRef getName() { return DebugName; }
+
+  void setNumVGPR(int32_t Num) { NumVGPR = Num; }
+  int32_t getNumVGPR() { return NumVGPR; }
+
+  void setNumAGPR(int32_t Num) { NumAGPR = Num; }
+  int32_t getNumAGPR() { return NumAGPR; }
+
+  void setNumExplicitSGPR(int32_t SGPR) { NumExplicitSGPR = SGPR; }
+  int32_t getNumExplicitSGPR() { return NumExplicitSGPR; }
+
+  void setPrivateSegmentSize(uint64_t SegmentSize) {
+    PrivateSegmentSize = SegmentSize;
+  }
+  uint64_t getPrivateSegmentSize() { return PrivateSegmentSize; }
+
+  void setVCCUsage(bool V) { UsesVCC = V; }
+  bool usesVCC() { return UsesVCC; }
+
+  void setFlatScratchUsage(bool F) { UsesFlatScratch = F; }
+  bool usesFlatScratch() { return UsesFlatScratch; }
+
+  void setDynamicallySizedStackUsage(bool Use) {
+    HasDynamicallySizedStack = Use;
+  }
+  bool hasDynamicallySizedStack() { return HasDynamicallySizedStack; }
+
+  void setRecursionUsage(bool R) { HasRecursion = R; }
+  bool hasRecursion() { return HasRecursion; }
+
+  int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const;
+  int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const;
+};
+
+class SIFunctionResourceInfoTracker {
+public:
+  DenseMap<const Function *, std::unique_ptr<SIFunctionResourceInfo>>
+      CallGraphResourceMap;
+  const Function *getCalleeFunction(const MachineOperand &Op);
+  void
+  gatherResourceInfoFromCallGraph(std::unique_ptr<SIFunctionResourceInfo> &Info,
+                                  const MachineFunction &MF);
+
+  int32_t getLeafFunctionVGPRsUsed(const MachineFunction &MF);
+  int32_t getLeafFunctionAGPRsUsed(const MachineFunction &MF);
+  int32_t getLeafFunctionSGPRsUsed(const MachineFunction &MF);
+
+  std::unique_ptr<SIFunctionResourceInfo>
+  analyzeResourceUsage(const MachineFunction &MF);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
diff --git a/llvm/lib/Target/AMDGPU/SIFunctionResourceInfoTracker.cpp b/llvm/lib/Target/AMDGPU/SIFunctionResourceInfoTracker.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIFunctionResourceInfoTracker.cpp
@@ -0,0 +1,470 @@
+//===-- AMDGPUFunctionResourceInfoTracker.cpp - AMDGPU assembly printer
+//--------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Implementation of function resource tracker.
+///
+//===----------------------------------------------------------------------===//
+//
+
+#include "SIFunctionResourceInfoTracker.h"
+
+using namespace llvm;
+using namespace llvm::AMDGPU;
+
+// We need to tell the runtime some amount ahead of time if we don't know the
+// true stack size. Assume a smaller number if this is only due to dynamic /
+// non-entry block allocas.
+cl::opt<uint32_t> AssumedStackSizeForExternalCall(
+    "amdgpu-assume-external-call-stack-size",
+    cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
+    cl::init(16384));
+
+cl::opt<uint32_t> AssumedStackSizeForDynamicSizeObjects(
+    "amdgpu-assume-dynamic-stack-object-size",
+    cl::desc("Assumed extra stack use if there are any "
+             "variable sized objects (in bytes)"),
+    cl::Hidden, cl::init(4096));
+
+static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
+                                  const SIInstrInfo &TII, unsigned Reg) {
+  for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
+    if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
+      return true;
+  }
+
+  return false;
+}
+
+int32_t SIFunctionResourceInfo::getTotalNumSGPRs(const GCNSubtarget &ST) const {
+  return NumExplicitSGPR +
+         IsaInfo::getNumExtraSGPRs(&ST, UsesVCC, UsesFlatScratch);
+}
+
+int32_t SIFunctionResourceInfo::getTotalNumVGPRs(const GCNSubtarget &ST) const {
+  return std::max(NumVGPR, NumAGPR);
+}
+
+int32_t SIFunctionResourceInfoTracker::getLeafFunctionVGPRsUsed(
+    const MachineFunction &MF) {
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+
+  assert(!FrameInfo.hasCalls() && !FrameInfo.hasTailCall() &&
+         "Should be called for leaf functions only.");
+
+  MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
+  for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
+    if (MRI.isPhysRegUsed(Reg)) {
+      HighestVGPRReg = Reg;
+      break;
+    }
+  }
+  return HighestVGPRReg == AMDGPU::NoRegister
+             ? 0
+             : TRI.getHWRegIndex(HighestVGPRReg) + 1;
+}
+
+int32_t SIFunctionResourceInfoTracker::getLeafFunctionAGPRsUsed(
+    const MachineFunction &MF) {
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+
+  assert(!FrameInfo.hasCalls() && !FrameInfo.hasTailCall() &&
+         "Should be called for leaf functions only.");
+
+  MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
+  for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
+    if (MRI.isPhysRegUsed(Reg)) {
+      HighestAGPRReg = Reg;
+      break;
+    }
+  }
+  return HighestAGPRReg == AMDGPU::NoRegister
+             ? 0
+             : TRI.getHWRegIndex(HighestAGPRReg) + 1;
+}
+
+int32_t SIFunctionResourceInfoTracker::getLeafFunctionSGPRsUsed(
+    const MachineFunction &MF) {
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+
+  assert(!FrameInfo.hasCalls() && !FrameInfo.hasTailCall() &&
+         "Should be called for leaf functions only.");
+
+  MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
+  for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
+    if (MRI.isPhysRegUsed(Reg)) {
+      HighestSGPRReg = Reg;
+      break;
+    }
+  }
+  return HighestSGPRReg == AMDGPU::NoRegister
+             ? 0
+             : TRI.getHWRegIndex(HighestSGPRReg) + 1;
+}
+
+const Function *
+SIFunctionResourceInfoTracker::getCalleeFunction(const MachineOperand &Op) {
+  if (Op.isImm()) {
+    assert(Op.getImm() == 0);
+    return nullptr;
+  }
+  return cast<Function>(Op.getGlobal());
+}
+
+void SIFunctionResourceInfoTracker::gatherResourceInfoFromCallGraph(
+    std::unique_ptr<SIFunctionResourceInfo> &Info, const MachineFunction &MF) {
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+
+  int32_t MaxVGPR = -1;
+  int32_t MaxAGPR = -1;
+  int32_t MaxSGPR = -1;
+  uint64_t CalleeFrameSize = 0;
+
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      // TODO: Check regmasks? Do they occur anywhere except calls?
+      for (const MachineOperand &MO : MI.operands()) {
+        unsigned Width = 0;
+        bool IsSGPR = false;
+        bool IsAGPR = false;
+
+        if (!MO.isReg())
+          continue;
+
+        Register Reg = MO.getReg();
+        switch (Reg) {
+        case AMDGPU::EXEC:
+        case AMDGPU::EXEC_LO:
+        case AMDGPU::EXEC_HI:
+        case AMDGPU::SCC:
+        case AMDGPU::M0:
+        case AMDGPU::SRC_SHARED_BASE:
+        case AMDGPU::SRC_SHARED_LIMIT:
+        case AMDGPU::SRC_PRIVATE_BASE:
+        case AMDGPU::SRC_PRIVATE_LIMIT:
+        case AMDGPU::SGPR_NULL:
+        case AMDGPU::MODE:
+          continue;
+
+        case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+          llvm_unreachable("src_pops_exiting_wave_id should not be used");
+
+        case AMDGPU::NoRegister:
+          assert(MI.isDebugInstr());
+          continue;
+
+        case AMDGPU::VCC:
+        case AMDGPU::VCC_LO:
+        case AMDGPU::VCC_HI:
+        case AMDGPU::VCC_LO_LO16:
+        case AMDGPU::VCC_LO_HI16:
+        case AMDGPU::VCC_HI_LO16:
+        case AMDGPU::VCC_HI_HI16:
+          Info->setVCCUsage(true);
+          continue;
+
+        case AMDGPU::FLAT_SCR:
+        case AMDGPU::FLAT_SCR_LO:
+        case AMDGPU::FLAT_SCR_HI:
+          continue;
+
+        case AMDGPU::XNACK_MASK:
+        case AMDGPU::XNACK_MASK_LO:
+        case AMDGPU::XNACK_MASK_HI:
+          llvm_unreachable("xnack_mask registers should not be used");
+
+        case AMDGPU::LDS_DIRECT:
+          llvm_unreachable("lds_direct register should not be used");
+
+        case AMDGPU::TBA:
+        case AMDGPU::TBA_LO:
+        case AMDGPU::TBA_HI:
+        case AMDGPU::TMA:
+        case AMDGPU::TMA_LO:
+        case AMDGPU::TMA_HI:
+          llvm_unreachable("trap handler registers should not be used");
+
+        case AMDGPU::SRC_VCCZ:
+          llvm_unreachable("src_vccz register should not be used");
+
+        case AMDGPU::SRC_EXECZ:
+          llvm_unreachable("src_execz register should not be used");
+
+        case AMDGPU::SRC_SCC:
+          llvm_unreachable("src_scc register should not be used");
+
+        default:
+          break;
+        }
+
+        if (AMDGPU::SReg_32RegClass.contains(Reg) ||
+            AMDGPU::SReg_LO16RegClass.contains(Reg) ||
+            AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
+          assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
+                 "trap handler registers should not be used");
+          IsSGPR = true;
+          Width = 1;
+        } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
+                   AMDGPU::VGPR_LO16RegClass.contains(Reg) ||
+                   AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 1;
+        } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
+                   AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 1;
+        } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
+          assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
+                 "trap handler registers should not be used");
+          IsSGPR = true;
+          Width = 2;
+        } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 2;
+        } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 2;
+        } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 3;
+        } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 3;
+        } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 3;
+        } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
+          assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
+                 "trap handler registers should not be used");
+          IsSGPR = true;
+          Width = 4;
+        } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 4;
+        } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 4;
+        } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 5;
+        } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 5;
+        } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 5;
+        } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 6;
+        } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 6;
+        } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 6;
+        } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
+          assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
+                 "trap handler registers should not be used");
+          IsSGPR = true;
+          Width = 8;
+        } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 8;
+        } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 8;
+        } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
+          assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
+                 "trap handler registers should not be used");
+          IsSGPR = true;
+          Width = 16;
+        } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 16;
+        } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 16;
+        } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 32;
+        } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 32;
+        } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
+          IsSGPR = false;
+          IsAGPR = true;
+          Width = 32;
+        } else {
+          llvm_unreachable("Unknown register class");
+        }
+        unsigned HWReg = TRI.getHWRegIndex(Reg);
+        int MaxUsed = HWReg + Width - 1;
+        if (IsSGPR) {
+          MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
+        } else if (IsAGPR) {
+          MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
+        } else {
+          MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
+        }
+      } // end of operand loop
+
+      if (MI.isCall()) {
+        // Pseudo used just to encode the underlying global. Is there a better
+        // way to track this?
+
+        const MachineOperand *CalleeOp =
+            TII->getNamedOperand(MI, AMDGPU::OpName::callee);
+
+        const Function *Callee = getCalleeFunction(*CalleeOp);
+        if (!Callee || Callee->isDeclaration()) {
+          // If this is a call to an external function, we can't do much. Make
+          // conservative guesses.
+          // 48 SGPRs - vcc, - flat_scr, -xnack
+          int MaxSGPRGuess = 47 - IsaInfo::getNumExtraSGPRs(
+                                      &ST, true, ST.hasFlatAddressSpace());
+          MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
+          MaxVGPR = std::max(MaxVGPR, 23);
+          MaxAGPR = std::max(MaxAGPR, 23);
+
+          CalleeFrameSize =
+              std::max(CalleeFrameSize,
+                       static_cast<uint64_t>(AssumedStackSizeForExternalCall));
+
+          Info->setVCCUsage(true);
+          Info->setFlatScratchUsage(ST.hasFlatAddressSpace());
+          Info->setDynamicallySizedStackUsage(true);
+        } else {
+          // We force CodeGen to run in SCC order, so the callee's register
+          // usage etc. should be the cumulative usage of all callees.
+
+          auto I = CallGraphResourceMap.find(Callee);
+          if (I == CallGraphResourceMap.end()) {
+            // Avoid crashing on undefined behavior with an illegal call to a
+            // kernel. If a callsite's calling convention doesn't match the
+            // function's, it's undefined behavior. If the callsite calling
+            // convention does match, that would have errored earlier.
+            // FIXME: The verifier shouldn't allow this.
+            if (AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
+              report_fatal_error("invalid call to entry function");
+
+            llvm_unreachable("callee should have been handled before caller");
+          }
+
+          assert(I->second && "Null resource info");
+          MaxSGPR = std::max(I->second->getNumExplicitSGPR() - 1, MaxSGPR);
+          MaxVGPR = std::max(I->second->getNumVGPR() - 1, MaxVGPR);
+          MaxAGPR = std::max(I->second->getNumAGPR() - 1, MaxAGPR);
+          CalleeFrameSize =
+              std::max(I->second->getPrivateSegmentSize(), CalleeFrameSize);
+          Info->setVCCUsage(Info->usesVCC() | I->second->usesVCC());
+          Info->setFlatScratchUsage(Info->usesFlatScratch() |
+                                    I->second->usesFlatScratch());
+          Info->setDynamicallySizedStackUsage(
+              Info->hasDynamicallySizedStack() |
+              I->second->hasDynamicallySizedStack());
+          Info->setRecursionUsage(Info->hasRecursion() |
+                                  I->second->hasRecursion());
+        }
+
+        // FIXME: Call site could have norecurse on it
+        if (!Callee || !Callee->doesNotRecurse())
+          Info->setRecursionUsage(true);
+      }
+    } // end of instruction loop
+  }   // end of BB loop
+
+  Info->setNumExplicitSGPR(MaxSGPR + 1);
+  Info->setNumVGPR(MaxVGPR + 1);
+  Info->setNumAGPR(MaxAGPR + 1);
+  Info->setPrivateSegmentSize(Info->getPrivateSegmentSize() + CalleeFrameSize);
+}
+
+std::unique_ptr<SIFunctionResourceInfo>
+SIFunctionResourceInfoTracker::analyzeResourceUsage(const MachineFunction &MF) {
+
+  auto Info = std::make_unique<SIFunctionResourceInfo>();
+  Info->setName(MF.getName());
+
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+
+  Info->setFlatScratchUsage(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
+                            MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI));
+
+  // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
+  // instructions aren't used to access the scratch buffer. Inline assembly may
+  // need it though.
+  //
+  // If we only have implicit uses of flat_scr on flat instructions, it is not
+  // really needed.
+  if (Info->usesFlatScratch() && !MFI->hasFlatScratchInit() &&
+      (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
+       !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
+       !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
+    Info->setFlatScratchUsage(false);
+  }
+
+  Info->setPrivateSegmentSize(FrameInfo.getStackSize());
+
+  // Assume a big number if there are any unknown sized objects.
+  Info->setDynamicallySizedStackUsage(FrameInfo.hasVarSizedObjects());
+  if (Info->hasDynamicallySizedStack())
+    Info->setPrivateSegmentSize(Info->getPrivateSegmentSize() +
+                                AssumedStackSizeForDynamicSizeObjects);
+
+  if (MFI->isStackRealigned())
+    Info->setPrivateSegmentSize(Info->getPrivateSegmentSize() +
+                                FrameInfo.getMaxAlign().value());
+
+  Info->setVCCUsage(MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
+                    MRI.isPhysRegUsed(AMDGPU::VCC_HI));
+
+  // If there are no calls, MachineRegisterInfo can tell us the used register
+  // count easily.
+  // A tail call isn't considered a call for MachineFrameInfo's purposes.
+  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
+    // Info.NumVGPR = MF.getLeafVGPRUsed(MRI, TRI);
+    // Calculate VGPRs.
+    Info->setNumVGPR(getLeafFunctionVGPRsUsed(MF));
+    // Calculate AGPRs.
+    if (ST.hasMAIInsts()) {
+      Info->setNumAGPR(getLeafFunctionAGPRsUsed(MF));
+    }
+    // Calculate SGPRs.
+    Info->setNumExplicitSGPR(getLeafFunctionSGPRsUsed(MF));
+    return std::move(Info);
+  }
+  gatherResourceInfoFromCallGraph(Info, MF);
+  return std::move(Info);
+}