diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -43,20 +43,6 @@
 char llvm::AMDGPUResourceUsageAnalysis::ID = 0;
 char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID;
 
-// We need to tell the runtime some amount ahead of time if we don't know the
-// true stack size. Assume a smaller number if this is only due to dynamic /
-// non-entry block allocas.
-static cl::opt<uint32_t> AssumedStackSizeForExternalCall(
-    "amdgpu-assume-external-call-stack-size",
-    cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
-    cl::init(16384));
-
-static cl::opt<uint32_t> AssumedStackSizeForDynamicSizeObjects(
-    "amdgpu-assume-dynamic-stack-object-size",
-    cl::desc("Assumed extra stack use if there are any "
-             "variable sized objects (in bytes)"),
-    cl::Hidden, cl::init(4096));
-
 INITIALIZE_PASS(AMDGPUResourceUsageAnalysis, DEBUG_TYPE,
                 "Function register usage analysis", true, true)
 
@@ -165,8 +151,6 @@
 
   // Assume a big number if there are any unknown sized objects.
   Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
-  if (Info.HasDynamicallySizedStack)
-    Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
 
   if (MFI->isStackRealigned())
     Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
@@ -459,27 +443,9 @@
         // FIXME: Call site could have norecurse on it
         if (!Callee || !Callee->doesNotRecurse()) {
           Info.HasRecursion = true;
-
-          // TODO: If we happen to know there is no stack usage in the
-          // callgraph, we don't need to assume an infinitely growing stack.
-          if (!MI.isReturn()) {
-            // We don't need to assume an unknown stack size for tail calls.
-
-            // FIXME: This only benefits in the case where the kernel does not
-            // directly call the tail called function. If a kernel directly
-            // calls a tail recursive function, we'll assume maximum stack size
-            // based on the regular call instruction.
-            CalleeFrameSize =
-              std::max(CalleeFrameSize,
-                       static_cast<uint64_t>(AssumedStackSizeForExternalCall));
-          }
         }
 
         if (IsIndirect || I == CallGraphResourceInfo.end()) {
-          CalleeFrameSize =
-              std::max(CalleeFrameSize,
-                       static_cast<uint64_t>(AssumedStackSizeForExternalCall));
-
           // Register usage of indirect calls gets handled later
           Info.UsesVCC = true;
           Info.UsesFlatScratch = ST.hasFlatAddressSpace();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,DEFAULTSIZE %s
-; RUN: llc -global-isel -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 < %s | FileCheck -check-prefixes=GCN,ASSUME1024 %s
 
 ; FIXME: Generated test checks do not check metadata at the end of the
 ; function, so this also includes manually added checks.
@@ -76,11 +75,8 @@
   store volatile i32 0, i32 addrspace(1)* undef
   ret void
 }
-; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4112
-; DEFAULTSIZE: ; ScratchSize: 4112
-
-; ASSUME1024: .amdhsa_private_segment_fixed_size 1040
-; ASSUME1024: ; ScratchSize: 1040
+; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 16
+; DEFAULTSIZE: ; ScratchSize: 16
 
 define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align64(i32 addrspace(1)* %out, i32 %arg.cond, i32 %in) {
 ; GCN-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64:
@@ -139,11 +135,8 @@
   ret void
 }
 
-; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4160
-; DEFAULTSIZE: ; ScratchSize: 4160
-
-; ASSUME1024: .amdhsa_private_segment_fixed_size 1088
-; ASSUME1024: ; ScratchSize: 1088
+; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 64
+; DEFAULTSIZE: ; ScratchSize: 64
 
 
 define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) {
diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
--- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
@@ -181,7 +181,7 @@
 ; GCN: is_dynamic_callstack = 1
 ; NumSgprs: 48
 ; NumVgprs: 24
-; GCN: ScratchSize: 16384
+; GCN: ScratchSize: 0
 define amdgpu_kernel void @usage_external() #0 {
   call void @external()
   ret void
@@ -193,14 +193,14 @@
 ; GCN: is_dynamic_callstack = 1
 ; NumSgprs: 48
 ; NumVgprs: 24
-; GCN: ScratchSize: 16384
+; GCN: ScratchSize: 0
 define amdgpu_kernel void @usage_external_recurse() #0 {
   call void @external_recurse()
   ret void
 }
 
 ; GCN-LABEL: {{^}}direct_recursion_use_stack:
-; GCN: ScratchSize: 18448{{$}}
+; GCN: ScratchSize: 2064{{$}}
 define void @direct_recursion_use_stack(i32 %val) #2 {
   %alloca = alloca [512 x i32], align 4, addrspace(5)
   call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0
@@ -219,7 +219,7 @@
 ; GCN-LABEL: {{^}}usage_direct_recursion:
 ; GCN: is_ptr64 = 1
 ; GCN: is_dynamic_callstack = 1
-; GCN: workitem_private_segment_byte_size = 18448{{$}}
+; GCN: workitem_private_segment_byte_size = 2064{{$}}
 define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
   call void @direct_recursion_use_stack(i32 %n)
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll
@@ -1,10 +1,10 @@
 ; Note: uses a randomly selected assumed external call stack size so that the
 ; test assertions are unlikely to succeed by accident.
 
-; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX7 %s
-; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx803 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX8 %s
-; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx900 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX9 %s
-; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx1010 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX10 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX7 %s
+; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx803 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX8 %s
+; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx900 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX9 %s
+; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx1010 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX10 %s
 
 ; CHECK-LABEL: amdhsa.kernels
 
@@ -78,7 +78,7 @@
 
 ; test a kernel with an external call that occurs before its callee in the module
 ; CHECK-LABEL: test3
-; CHECK:     .private_segment_fixed_size: 5310
+; CHECK:     .private_segment_fixed_size: 0
 
 ; GFX7:     .sgpr_count:     37
 ; GFX7:     .sgpr_spill_count: 0
@@ -108,7 +108,7 @@
 
 ; test a kernel without an external call that occurs after its callee in the module
 ; CHECK-LABEL: test4
-; CHECK:     .private_segment_fixed_size: 5310
+; CHECK:     .private_segment_fixed_size: 0
 
 ; GFX7:     .sgpr_count:     37
 ; GFX7:     .sgpr_spill_count: 0
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
--- a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
@@ -66,7 +66,7 @@
 ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 ; CHECK-NEXT: .amdhsa_uses_dynamic_stack 1
-; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
 ; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 ; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
 ; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
--- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -27,7 +27,7 @@
 ; GCN-NEXT:     enable_wgp_mode = 0
 ; GCN-NEXT:     enable_mem_ordered = 0
 ; GCN-NEXT:     enable_fwd_progress = 0
-; GCN-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
+; GCN-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
 ; GCN-NEXT:     user_sgpr_count = 14
 ; GCN-NEXT:     enable_trap_handler = 0
 ; GCN-NEXT:     enable_sgpr_workgroup_id_x = 1
@@ -55,7 +55,7 @@
 ; GCN-NEXT:     is_dynamic_callstack = 1
 ; GCN-NEXT:     is_debug_enabled = 0
 ; GCN-NEXT:     is_xnack_enabled = 0
-; GCN-NEXT:     workitem_private_segment_byte_size = 16384
+; GCN-NEXT:     workitem_private_segment_byte_size = 0
 ; GCN-NEXT:     workgroup_group_segment_byte_size = 0
 ; GCN-NEXT:     gds_segment_byte_size = 0
 ; GCN-NEXT:     kernarg_segment_byte_size = 64
@@ -120,7 +120,7 @@
 ; GISEL-NEXT:     enable_wgp_mode = 0
 ; GISEL-NEXT:     enable_mem_ordered = 0
 ; GISEL-NEXT:     enable_fwd_progress = 0
-; GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
+; GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
 ; GISEL-NEXT:     user_sgpr_count = 14
 ; GISEL-NEXT:     enable_trap_handler = 0
 ; GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
@@ -148,7 +148,7 @@
 ; GISEL-NEXT:     is_dynamic_callstack = 1
 ; GISEL-NEXT:     is_debug_enabled = 0
 ; GISEL-NEXT:     is_xnack_enabled = 0
-; GISEL-NEXT:     workitem_private_segment_byte_size = 16384
+; GISEL-NEXT:     workitem_private_segment_byte_size = 0
 ; GISEL-NEXT:     workgroup_group_segment_byte_size = 0
 ; GISEL-NEXT:     gds_segment_byte_size = 0
 ; GISEL-NEXT:     kernarg_segment_byte_size = 64
@@ -218,7 +218,7 @@
 ; GCN-NEXT:     enable_wgp_mode = 0
 ; GCN-NEXT:     enable_mem_ordered = 0
 ; GCN-NEXT:     enable_fwd_progress = 0
-; GCN-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
+; GCN-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
 ; GCN-NEXT:     user_sgpr_count = 14
 ; GCN-NEXT:     enable_trap_handler = 0
 ; GCN-NEXT:     enable_sgpr_workgroup_id_x = 1
@@ -246,7 +246,7 @@
 ; GCN-NEXT:     is_dynamic_callstack = 1
 ; GCN-NEXT:     is_debug_enabled = 0
 ; GCN-NEXT:     is_xnack_enabled = 0
-; GCN-NEXT:     workitem_private_segment_byte_size = 16384
+; GCN-NEXT:     workitem_private_segment_byte_size = 0
 ; GCN-NEXT:     workgroup_group_segment_byte_size = 0
 ; GCN-NEXT:     gds_segment_byte_size = 0
 ; GCN-NEXT:     kernarg_segment_byte_size = 64
@@ -312,7 +312,7 @@
 ; GISEL-NEXT:     enable_wgp_mode = 0
 ; GISEL-NEXT:     enable_mem_ordered = 0
 ; GISEL-NEXT:     enable_fwd_progress = 0
-; GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
+; GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
 ; GISEL-NEXT:     user_sgpr_count = 14
 ; GISEL-NEXT:     enable_trap_handler = 0
 ; GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
@@ -340,7 +340,7 @@
 ; GISEL-NEXT:     is_dynamic_callstack = 1
 ; GISEL-NEXT:     is_debug_enabled = 0
 ; GISEL-NEXT:     is_xnack_enabled = 0
-; GISEL-NEXT:     workitem_private_segment_byte_size = 16384
+; GISEL-NEXT:     workitem_private_segment_byte_size = 0
 ; GISEL-NEXT:     workgroup_group_segment_byte_size = 0
 ; GISEL-NEXT:     gds_segment_byte_size = 0
 ; GISEL-NEXT:     kernarg_segment_byte_size = 64
diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
--- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=DEFAULTSIZE,MUBUF %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 < %s | FileCheck -check-prefixes=ASSUME1024,MUBUF %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=DEFAULTSIZE,FLATSCR %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch -amdgpu-assume-dynamic-stack-object-size=1024 < %s | FileCheck -check-prefixes=ASSUME1024,FLATSCR %s
 
 ; FIXME: Generated test checks do not check metadata at the end of the
 ; function, so this also includes manually added checks.
@@ -108,11 +106,8 @@
   store volatile i32 0, i32 addrspace(1)* undef
   ret void
 }
-; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4112
-; DEFAULTSIZE: ; ScratchSize: 4112
-
-; ASSUME1024: .amdhsa_private_segment_fixed_size 1040
-; ASSUME1024: ; ScratchSize: 1040
+; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 16
+; DEFAULTSIZE: ; ScratchSize: 16
 
 define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align64(i32 addrspace(1)* %out, i32 %arg.cond, i32 %in) {
 ; MUBUF-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64:
@@ -201,11 +196,8 @@
   ret void
 }
 
-; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4160
-; DEFAULTSIZE: ; ScratchSize: 4160
-
-; ASSUME1024: .amdhsa_private_segment_fixed_size 1088
-; ASSUME1024: ; ScratchSize: 1088
+; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 64
+; DEFAULTSIZE: ; ScratchSize: 64
 
 
 define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) {
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
@@ -80,7 +80,7 @@
 ; ASM: buffer_store_dword
 ; ASM: buffer_store_dword
 ; ASM: s_swappc_b64
-; ASM: ScratchSize: 16400
+; ASM: ScratchSize: 16
 define amdgpu_kernel void @call_private(i32 addrspace(1)* %out, i32 %in) #0 {
 entry:
   %tmp = alloca [2 x i32], addrspace(5)
diff --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll
--- a/llvm/test/CodeGen/AMDGPU/recursion.ll
+++ b/llvm/test/CodeGen/AMDGPU/recursion.ll
@@ -28,9 +28,9 @@
   ret void
 }
 
-; For an arbitrary recursive call, report a large number for unknown stack usage.
+; For an arbitrary recursive call, report only the minimum stack requirement
 ; CHECK-LABEL: {{^}}calls_recursive:
-; CHECK: .amdhsa_private_segment_fixed_size 16400{{$}}
+; CHECK: .amdhsa_private_segment_fixed_size 16{{$}}
 define amdgpu_kernel void @calls_recursive() {
   call void @recursive()
   ret void
@@ -50,14 +50,14 @@
 ; in the kernel.
 
 ; CHECK-LABEL: {{^}}kernel_calls_tail_recursive:
-; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}}
+; CHECK: .amdhsa_private_segment_fixed_size 0{{$}}
 define amdgpu_kernel void @kernel_calls_tail_recursive() {
   call void @tail_recursive()
   ret void
 }
 
 ; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
-; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}}
+; CHECK: .amdhsa_private_segment_fixed_size 8{{$}}
 define amdgpu_kernel void @kernel_calls_tail_recursive_with_stack() {
   call void @tail_recursive_with_stack()
   ret void