diff --git a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl --- a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl +++ b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl @@ -1,15 +1,16 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null -// expected-remark@+9 {{Function Name: foo}} -// expected-remark@+8 {{ SGPRs: 13}} -// expected-remark@+7 {{ VGPRs: 10}} -// expected-remark@+6 {{ AGPRs: 12}} -// expected-remark@+5 {{ ScratchSize [bytes/lane]: 0}} -// expected-remark@+4 {{ Occupancy [waves/SIMD]: 10}} -// expected-remark@+3 {{ SGPRs Spill: 0}} -// expected-remark@+2 {{ VGPRs Spill: 0}} -// expected-remark@+1 {{ LDS Size [bytes/block]: 0}} +// expected-remark@+10 {{Function Name: foo}} +// expected-remark@+9 {{ SGPRs: 13}} +// expected-remark@+8 {{ VGPRs: 10}} +// expected-remark@+7 {{ AGPRs: 12}} +// expected-remark@+6 {{ ScratchSize [bytes/lane]: 0}} +// expected-remark@+5 {{ Occupancy [waves/SIMD]: 10}} +// expected-remark@+4 {{ SGPRs Spill: 0}} +// expected-remark@+3 {{ VGPRs Spill: 0}} +// expected-remark@+2 {{ LDS Size [bytes/block]: 0}} +// expected-remark@+1 {{ Uses Dynamic Stack: False}} __kernel void foo() { __asm volatile ("; clobber s8" :::"s8"); __asm volatile ("; clobber v9" :::"v9"); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1302,4 +1302,8 @@ if (isModuleEntryFunction) EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]", CurrentProgramInfo.LDSSize); + std::string UsesDynamicStackStr = + CurrentProgramInfo.DynamicCallStack ? "True" : "False"; + EmitResourceUsageRemark("UsesDynamicStack", "Uses Dynamic Stack", + UsesDynamicStackStr); } diff --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll --- a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll +++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=STDERR %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefix=STDERR %s ; RUN: FileCheck -check-prefix=REMARK %s < %t ; STDERR: remark: foo.cl:27:0: Function Name: test_kernel @@ -10,6 +10,7 @@ ; STDERR-NEXT: remark: foo.cl:27:0: SGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:27:0: VGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:27:0: LDS Size [bytes/block]: 512 +; STDERR-NEXT: remark: foo.cl:27:0: Uses Dynamic Stack: False ; REMARK-LABEL: --- !Analysis ; REMARK: Pass: kernel-resource-usage @@ -111,6 +112,7 @@ ; STDERR-NEXT: remark: foo.cl:42:0: Occupancy [waves/SIMD]: 0 ; STDERR-NEXT: remark: foo.cl:42:0: SGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:42:0: VGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:42:0: Uses Dynamic Stack: False ; STDERR-NOT: LDS Size define void @test_func() !dbg !6 { call void asm sideeffect "; clobber v17", "~{v17}"() @@ -128,6 +130,7 @@ ; STDERR-NEXT: remark: foo.cl:8:0: SGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:8:0: VGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:8:0: LDS Size [bytes/block]: 0 +; STDERR-NEXT: remark: foo.cl:8:0: Uses Dynamic Stack: False define amdgpu_kernel void @empty_kernel() !dbg !7 { ret void } @@ -140,12 +143,33 @@ ; STDERR-NEXT: remark: foo.cl:52:0: Occupancy [waves/SIMD]: 0 ; STDERR-NEXT: remark: foo.cl:52:0: SGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:52:0: VGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:52:0: Uses Dynamic Stack: False define void @empty_func() !dbg !8 { ret void } +; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call +; STDERR-NEXT: remark: foo.cl:64:0: SGPRs: 39 +; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: 32 +; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: 10 +; STDERR-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0 +; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: 8 +; STDERR-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0 +; STDERR-NEXT: remark: foo.cl:64:0: Uses Dynamic Stack: True +@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 + +define amdgpu_kernel void @test_indirect_call() !dbg !9 { + %fptr = load ptr, ptr addrspace(4) @gv.fptr0 + call void %fptr() + ret void +} + + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2} +!llvm.module.flags = !{!10} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) !1 = !DIFile(filename: "foo.cl", directory: "/tmp") @@ -156,3 +180,5 @@ !6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) !7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) !8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!9 = distinct !DISubprogram(name: "test_indirect_call", scope: !1, file: !1, type: !4, scopeLine: 64, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!10 = !{i32 1, !"amdgpu_code_object_version", i32 500}