Index: llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -452,6 +452,25 @@ if (!IsIndirect) I = CallGraphResourceInfo.find(Callee); + // FIXME: Call site could have norecurse on it + if (!Callee || !Callee->doesNotRecurse()) { + Info.HasRecursion = true; + + // TODO: If we happen to know there is no stack usage in the + // callgraph, we don't need to assume an infinitely growing stack. + if (!MI.isReturn()) { + // We don't need to assume an unknown stack size for tail calls. + + // FIXME: This only benefits in the case where the kernel does not + // directly call the tail called function. If a kernel directly + // calls a tail recursive function, we'll assume maximum stack size + // based on the regular call instruction. + CalleeFrameSize = + std::max(CalleeFrameSize, + static_cast(AssumedStackSizeForExternalCall)); + } + } + if (IsIndirect || I == CallGraphResourceInfo.end()) { CalleeFrameSize = std::max(CalleeFrameSize, @@ -476,10 +495,6 @@ Info.HasRecursion |= I->second.HasRecursion; Info.HasIndirectCall |= I->second.HasIndirectCall; } - - // FIXME: Call site could have norecurse on it - if (!Callee || !Callee->doesNotRecurse()) - Info.HasRecursion = true; } } } Index: llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -199,7 +199,7 @@ } ; GCN-LABEL: {{^}}direct_recursion_use_stack: -; GCN: ScratchSize: 2064 +; GCN: ScratchSize: 18448{{$}} define void @direct_recursion_use_stack(i32 %val) #2 { %alloca = alloca [512 x i32], align 4, addrspace(5) call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0 @@ -218,7 +218,7 @@ ; GCN-LABEL: {{^}}usage_direct_recursion: ; GCN: is_ptr64 = 1 ; GCN: is_dynamic_callstack = 1 -; GCN: workitem_private_segment_byte_size = 2064 +; GCN: workitem_private_segment_byte_size = 18448{{$}} define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 { call void @direct_recursion_use_stack(i32 %n) ret void Index: llvm/test/CodeGen/AMDGPU/recursion.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/recursion.ll @@ -0,0 +1,64 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}recursive: +; CHECK: ScratchSize: 16 +define void @recursive() { + call void @recursive() + store volatile i32 0, i32 addrspace(1)* undef + ret void +} + +; CHECK-LABEL: {{^}}tail_recursive: +; CHECK: ScratchSize: 0 +define void @tail_recursive() { + tail call void @tail_recursive() + ret void +} + +define void @calls_tail_recursive() norecurse { + tail call void @tail_recursive() + ret void +} + +; CHECK-LABEL: {{^}}tail_recursive_with_stack: +define void @tail_recursive_with_stack() { + %alloca = alloca i32, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca + tail call void @tail_recursive_with_stack() + ret void +} + +; For an arbitrary recursive call, report a large number for unknown stack usage. +; CHECK-LABEL: {{^}}calls_recursive: +; CHECK: .amdhsa_private_segment_fixed_size 16400{{$}} +define amdgpu_kernel void @calls_recursive() { + call void @recursive() + ret void +} + +; Make sure we do not report a huge stack size for tail recursive +; functions +; CHECK-LABEL: {{^}}kernel_indirectly_calls_tail_recursive: +; CHECK: .amdhsa_private_segment_fixed_size 0{{$}} +define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() { + call void @calls_tail_recursive() + ret void +} + +; TODO: Even though tail_recursive is only called as a tail call, we +; end up treating it as generally recursive call from the regular call +; in the kernel. + +; CHECK-LABEL: {{^}}kernel_calls_tail_recursive: +; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}} +define amdgpu_kernel void @kernel_calls_tail_recursive() { + call void @tail_recursive() + ret void +} + +; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack: +; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}} +define amdgpu_kernel void @kernel_calls_tail_recursive_with_stack() { + call void @tail_recursive_with_stack() + ret void +}