Index: clang/test/Frontend/amdgcn-machine-analysis-remarks.cl =================================================================== --- /dev/null +++ clang/test/Frontend/amdgcn-machine-analysis-remarks.cl @@ -0,0 +1,14 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=asm-printer -S -O0 -fno-experimental-new-pass-manager -verify %s + +// expected-remark@+7 {{BasicBlock: entry: 1}} +// expected-remark@+5 {{4 instructions in function}} +// expected-remark@+4 {{13 SGPRs used in function}} +// expected-remark@+3 {{10 VGPRs used in function}} +// expected-remark@+2 {{12 AGPRs used in function}} +// expected-remark@+1 {{0 bytes LDS used in function}} +__kernel void foo() { + __asm volatile ("; clobber s8" :::"s8"); + __asm volatile ("; clobber v9" :::"v9"); + __asm volatile ("; clobber a11" :::"a11"); +} Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -26,6 +26,7 @@ #include "SIMachineFunctionInfo.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -39,6 +40,8 @@ using namespace llvm; using namespace llvm::AMDGPU; +#define DEBUG_TYPE "asm-printer" + // We need to tell the runtime some amount ahead of time if we don't know the // true stack size. Assume a smaller number if this is only due to dynamic / // non-entry block allocas. @@ -473,6 +476,42 @@ emitFunctionBody(); + ORE->emit( + [&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "NumSGPR", MF.getFunction().getSubprogram(), &MF.front()) + << ore::NV("NumSGPR", CurrentProgramInfo.NumSGPR) + << " SGPRs used in function"; + }); + + ORE->emit( + [&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "NumVGPR", MF.getFunction().getSubprogram(), &MF.front()) + << ore::NV("NumVGPR", CurrentProgramInfo.NumArchVGPR) + << " VGPRs used in function"; + }); + + if (STM.hasMAIInsts()) { + ORE->emit( + [&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "NumAGPR", MF.getFunction().getSubprogram(), &MF.front()) + << ore::NV("NumAGPR", CurrentProgramInfo.NumAccVGPR) + << " AGPRs used in function"; + }); + } + + if (MFI->isModuleEntryFunction()) { + ORE->emit( + [&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "BytesLDS", MF.getFunction().getSubprogram(), &MF.front()) + << ore::NV("BytesLDS", CurrentProgramInfo.LDSSize) + << " bytes LDS used in function"; + }); + } + if (isVerbose()) { MCSectionELF *CommentSection = Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0); Index: llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll @@ -0,0 +1,121 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=asm-printer -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=STDERR %s +; RUN: FileCheck -check-prefix=REMARK %s < %t + +; STDERR: remark: foo.cl:27:0: 6 instructions in function +; STDERR-NEXT: remark: foo.cl:27:0: 28 SGPRs used in function +; STDERR-NEXT: remark: foo.cl:27:0: 9 VGPRs used in function +; STDERR-NEXT: remark: foo.cl:27:0: 43 AGPRs used in function +; STDERR-NEXT: remark: foo.cl:27:0: 512 bytes LDS used in function +; STDERR-NEXT: remark: :0:0: BasicBlock: +; STDERR-NEXT: : 2 + +; REMARK-LABEL: --- !Analysis +; REMARK-NEXT: Pass: prologepilog +; REMARK-NEXT: Name: StackSize +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - NumStackBytes: '0' +; REMARK-NEXT: - String: ' stack bytes in function' +; REMARK-NEXT: ... + +; REMARK-LABEL: --- !Analysis +; REMARK-NEXT: Pass: asm-printer +; REMARK-NEXT: Name: InstructionMix +; REMARK-NEXT: Function: test_kernel + +; REMARK-LABEL: --- !Analysis +; REMARK-NEXT: Pass: asm-printer +; REMARK-NEXT: Name: InstructionCount +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel + +; REMARK-LABEL: --- !Analysis +; REMARK-NEXT: Pass: asm-printer +; REMARK-NEXT: Name: NumSGPR{{$}} +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - NumSGPR: '28' +; REMARK-NEXT: - String: ' SGPRs used in function' +; REMARK-NEXT: ... +; REMARK-NEXT: --- !Analysis +; REMARK-NEXT: Pass: asm-printer +; REMARK-NEXT: Name: NumVGPR{{$}} +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - NumVGPR: '9' +; REMARK-NEXT: - String: ' VGPRs used in function' +; REMARK-NEXT: ... +; REMARK-NEXT: --- !Analysis +; REMARK-NEXT: Pass: asm-printer +; REMARK-NEXT: Name: NumAGPR +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - NumAGPR: '43' +; REMARK-NEXT: - String: ' AGPRs used in function' +; REMARK-NEXT: ... +; REMARK-NEXT: --- !Analysis +; REMARK-NEXT: Pass: asm-printer +; REMARK-NEXT: Name: BytesLDS +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - BytesLDS: '512' +; REMARK-NEXT: - String: ' bytes LDS used in function' +; REMARK-NEXT: ... +@lds = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4 + +define amdgpu_kernel void @test_kernel() !dbg !3 { + call void asm sideeffect "; clobber v8", "~{v8}"() + call void asm sideeffect "; clobber s23", "~{s23}"() + call void asm sideeffect "; clobber a42", "~{a42}"() + call void asm sideeffect "; use $0", "v"([128 x i32] addrspace(3)* @lds) + ret void +} + +; STDERR: remark: foo.cl:42:0: 5 instructions in function +; STDERR-NEXT: remark: foo.cl:42:0: 0 SGPRs used in function +; STDERR-NEXT: remark: foo.cl:42:0: 0 VGPRs used in function +; STDERR-NEXT: remark: foo.cl:42:0: 0 AGPRs used in function +; STDERR-NOT: LDS used in function +define void @test_func() !dbg !6 { + call void asm sideeffect "; clobber v17", "~{v17}"() + call void asm sideeffect "; clobber s11", "~{s11}"() + call void asm sideeffect "; clobber a9", "~{a9}"() + ret void +} + +; STDERR: remark: foo.cl:8:0: 1 instructions in function +; STDERR-NEXT: remark: foo.cl:8:0: 4 SGPRs used in function +; STDERR-NEXT: remark: foo.cl:8:0: 0 VGPRs used in function +; STDERR-NEXT: remark: foo.cl:8:0: 0 AGPRs used in function +; STDERR-NEXT: remark: foo.cl:8:0: 0 bytes LDS used in function +; STDERR-NEXT: remark: :0:0: BasicBlock: +; STDERR-NEXT: : 2 +define amdgpu_kernel void @empty_kernel() !dbg !7 { + ret void +} + +; STDERR: remark: foo.cl:52:0: 2 instructions in function +; STDERR-NEXT: remark: foo.cl:52:0: 0 SGPRs used in function +; STDERR-NEXT: remark: foo.cl:52:0: 0 VGPRs used in function +; STDERR-NEXT: remark: foo.cl:52:0: 0 AGPRs used in function +define void @empty_func() !dbg !8 { + ret void +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "foo.cl", directory: "/tmp") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "test_kernel", scope: !1, file: !1, type: !4, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!4 = !DISubroutineType(types: !5) +!5 = !{null} +!6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)