diff --git a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl new file mode 100644 --- /dev/null +++ b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl @@ -0,0 +1,18 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null + +// expected-remark@+10 {{Kernel Name: foo}} +// expected-remark@+9 {{SGPRs: 9}} +// expected-remark@+8 {{VGPRs: 10}} +// expected-remark@+7 {{AGPRs: 12}} +// expected-remark@+6 {{ScratchSize [bytes/thread]: 0}} +// expected-remark@+5 {{Occupancy [waves/SIMD]: 10}} +// expected-remark@+4 {{SGPRs Spill: 0}} +// expected-remark@+3 {{VGPRs Spill: 0}} +// expected-remark@+2 {{LDS Size [bytes/block]: 0}} +// expected-remark@+1 {{------------------------------}} +__kernel void foo() { + __asm volatile ("; clobber s8" :::"s8"); + __asm volatile ("; clobber v9" :::"v9"); + __asm volatile ("; clobber a11" :::"a11"); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -69,6 +69,9 @@ uint64_t ScratchSize, uint64_t CodeSize, const AMDGPUMachineFunction* MFI); + void emitResourceUsageRemarks(const MachineFunction &MF, + const SIProgramInfo &CurrentProgramInfo, + bool isModuleEntryFunction, bool hasMAIInsts); uint16_t getAmdhsaKernelCodeProperties( const MachineFunction &MF) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -27,8 +27,10 @@ #include "SIMachineFunctionInfo.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -506,6 +508,9 @@ emitFunctionBody(); + emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(), + STM.hasMAIInsts()); + if (isVerbose()) { MCSectionELF *CommentSection = Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0); @@ -876,7 +881,10 @@ } unsigned LDSSpillSize = - MFI->getLDSWaveSpillSize() * MFI->getMaxFlatWorkGroupSize(); + MFI->getLDSWaveSpillSize() * MFI->getMaxFlatWorkGroupSize(); + + ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs(); + ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs(); ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize; ProgInfo.LDSBlocks = @@ -1167,3 +1175,92 @@ AU.addPreserved(); AsmPrinter::getAnalysisUsage(AU); } + +void AMDGPUAsmPrinter::emitResourceUsageRemarks( + const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo, + bool isModuleEntryFunction, bool hasMAIInsts) { + if (!ORE) + return; + + const char *Name = "kernel-resource-usage"; + + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(Name, "KernelName", + MF.getFunction().getSubprogram(), + &MF.front()) + << "Kernel Name: " + << ore::NV("KernelName", MF.getFunction().getName()); + }); + + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(Name, "NumSGPR", + MF.getFunction().getSubprogram(), + &MF.front()) + << "SGPRs: " << ore::NV("NumSGPR", CurrentProgramInfo.NumSGPR); + }); + + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(Name, "NumVGPR", + MF.getFunction().getSubprogram(), + &MF.front()) + << "VGPRs: " << ore::NV("NumVGPR", CurrentProgramInfo.NumArchVGPR); + }); + + if (hasMAIInsts) { + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(Name, "NumAGPR", + MF.getFunction().getSubprogram(), + &MF.front()) + << "AGPRs: " << ore::NV("NumAGPR", CurrentProgramInfo.NumAccVGPR); + }); + } + + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(Name, "ScratchSize", + MF.getFunction().getSubprogram(), + &MF.front()) + << "ScratchSize [bytes/thread]: " + << ore::NV("ScratchSize", CurrentProgramInfo.ScratchSize); + }); + + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(Name, "Occupancy", + MF.getFunction().getSubprogram(), + &MF.front()) + << "Occupancy [waves/SIMD]: " + << ore::NV("Occupancy", CurrentProgramInfo.Occupancy); + }); + + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(Name, "SGPRSpill", + MF.getFunction().getSubprogram(), + &MF.front()) + << "SGPRs Spill: " + << ore::NV("SGPRSpill", CurrentProgramInfo.SGPRSpill); + }); + + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(Name, "VGPRSpill", + MF.getFunction().getSubprogram(), + &MF.front()) + << "VGPRs Spill: " + << ore::NV("VGPRSpill", CurrentProgramInfo.VGPRSpill); + }); + + if (isModuleEntryFunction) { + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(Name, "BytesLDS", + MF.getFunction().getSubprogram(), + &MF.front()) + << "LDS Size [bytes/block]: " + << ore::NV("BytesLDS", CurrentProgramInfo.LDSSize); + }); + } + + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(Name, "KernelEnd", + MF.getFunction().getSubprogram(), + &MF.front()) + << "------------------------------"; + }); +} diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/llvm/lib/Target/AMDGPU/SIProgramInfo.h --- a/llvm/lib/Target/AMDGPU/SIProgramInfo.h +++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.h @@ -49,6 +49,8 @@ uint32_t AccumOffset = 0; uint32_t TgSplit = 0; uint32_t NumSGPR = 0; + unsigned SGPRSpill = 0; + unsigned VGPRSpill = 0; uint32_t LDSSize = 0; bool FlatUsed = false; diff --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll @@ -0,0 +1,169 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=STDERR %s +; RUN: FileCheck -check-prefix=REMARK %s < %t + +; STDERR: remark: foo.cl:27:0: Kernel Name: test_kernel +; STDERR-NEXT: remark: foo.cl:27:0: SGPRs: 24 +; STDERR-NEXT: remark: foo.cl:27:0: VGPRs: 9 +; STDERR-NEXT: remark: foo.cl:27:0: AGPRs: 43 +; STDERR-NEXT: remark: foo.cl:27:0: ScratchSize [bytes/thread]: 0 +; STDERR-NEXT: remark: foo.cl:27:0: Occupancy [waves/SIMD]: 5 +; STDERR-NEXT: remark: foo.cl:27:0: SGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:27:0: VGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:27:0: LDS Size [bytes/block]: 512 +; STDERR-NEXT: remark: foo.cl:27:0: ------------------------------ + +; REMARK-LABEL: --- !Analysis +; REMARK: Pass: kernel-resource-usage +; REMARK-NEXT: Name: KernelName +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - String: 'Kernel Name: ' +; REMARK-NEXT: - KernelName: test_kernel +; REMARK-NEXT: ... +; REMARK-NEXT: --- !Analysis +; REMARK-NEXT: Pass: kernel-resource-usage +; REMARK-NEXT: Name: NumSGPR +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - String: 'SGPRs: ' +; REMARK-NEXT: - NumSGPR: '24' +; REMARK-NEXT: ... +; REMARK-NEXT: --- !Analysis +; REMARK-NEXT: Pass: kernel-resource-usage +; REMARK-NEXT: Name: NumVGPR +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - String: 'VGPRs: ' +; REMARK-NEXT: - NumVGPR: '9' +; REMARK-NEXT: ... +; REMARK-NEXT: --- !Analysis +; REMARK-NEXT: Pass: kernel-resource-usage +; REMARK-NEXT: Name: NumAGPR +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - String: 'AGPRs: ' +; REMARK-NEXT: - NumAGPR: '43' +; REMARK-NEXT: ... +; REMARK-NEXT: --- !Analysis +; REMARK-NEXT: Pass: kernel-resource-usage +; REMARK-NEXT: Name: ScratchSize +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - String: 'ScratchSize [bytes/thread]: ' +; REMARK-NEXT: - ScratchSize: '0' +; REMARK-NEXT: ... +; REMARK-NEXT: --- !Analysis +; REMARK-NEXT: Pass: kernel-resource-usage +; REMARK-NEXT: Name: Occupancy +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - String: 'Occupancy [waves/SIMD]: ' +; REMARK-NEXT: - Occupancy: '5' +; REMARK-NEXT: ... +; REMARK-NEXT: --- !Analysis +; REMARK-NEXT: Pass: kernel-resource-usage +; REMARK-NEXT: Name: SGPRSpill +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - String: 'SGPRs Spill: ' +; REMARK-NEXT: - SGPRSpill: '0' +; REMARK-NEXT: ... +; REMARK-NEXT: --- !Analysis +; REMARK-NEXT: Pass: kernel-resource-usage +; REMARK-NEXT: Name: VGPRSpill +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - String: 'VGPRs Spill: ' +; REMARK-NEXT: - VGPRSpill: '0' +; REMARK-NEXT: ... +; REMARK-NEXT: --- !Analysis +; REMARK-NEXT: Pass: kernel-resource-usage +; REMARK-NEXT: Name: BytesLDS +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - String: 'LDS Size [bytes/block]: ' +; REMARK-NEXT: - BytesLDS: '512' +; REMARK-NEXT: ... +; REMARK-NEXT: --- !Analysis +; REMARK-NEXT: Pass: kernel-resource-usage +; REMARK-NEXT: Name: KernelEnd +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - String: '------------------------------' +; REMARK-NEXT: ... +@lds = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4 + +define amdgpu_kernel void @test_kernel() !dbg !3 { + call void asm sideeffect "; clobber v8", "~{v8}"() + call void asm sideeffect "; clobber s23", "~{s23}"() + call void asm sideeffect "; clobber a42", "~{a42}"() + call void asm sideeffect "; use $0", "v"([128 x i32] addrspace(3)* @lds) + ret void +} + +; STDERR: remark: foo.cl:42:0: Kernel Name: test_func +; STDERR-NEXT: remark: foo.cl:42:0: SGPRs: 0 +; STDERR-NEXT: remark: foo.cl:42:0: VGPRs: 0 +; STDERR-NEXT: remark: foo.cl:42:0: AGPRs: 0 +; STDERR-NEXT: remark: foo.cl:42:0: ScratchSize [bytes/thread]: 0 +; STDERR-NEXT: remark: foo.cl:42:0: Occupancy [waves/SIMD]: 0 +; STDERR-NEXT: remark: foo.cl:42:0: SGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:42:0: VGPRs Spill: 0 +; STDERR-NOT: LDS Size +; STDERR-NEXT: remark: foo.cl:42:0: ------------------------------ +define void @test_func() !dbg !6 { + call void asm sideeffect "; clobber v17", "~{v17}"() + call void asm sideeffect "; clobber s11", "~{s11}"() + call void asm sideeffect "; clobber a9", "~{a9}"() + ret void +} + +; STDERR: remark: foo.cl:8:0: Kernel Name: empty_kernel +; STDERR-NEXT: remark: foo.cl:8:0: SGPRs: 0 +; STDERR-NEXT: remark: foo.cl:8:0: VGPRs: 0 +; STDERR-NEXT: remark: foo.cl:8:0: AGPRs: 0 +; STDERR-NEXT: remark: foo.cl:8:0: ScratchSize [bytes/thread]: 0 +; STDERR-NEXT: remark: foo.cl:8:0: Occupancy [waves/SIMD]: 10 +; STDERR-NEXT: remark: foo.cl:8:0: SGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:8:0: VGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:8:0: LDS Size [bytes/block]: 0 +; STDERR-NEXT: remark: foo.cl:8:0: ------------------------------ +define amdgpu_kernel void @empty_kernel() !dbg !7 { + ret void +} + +; STDERR: remark: foo.cl:52:0: Kernel Name: empty_func +; STDERR-NEXT: remark: foo.cl:52:0: SGPRs: 0 +; STDERR-NEXT: remark: foo.cl:52:0: VGPRs: 0 +; STDERR-NEXT: remark: foo.cl:52:0: AGPRs: 0 +; STDERR-NEXT: remark: foo.cl:52:0: ScratchSize [bytes/thread]: 0 +; STDERR-NEXT: remark: foo.cl:52:0: Occupancy [waves/SIMD]: 0 +; STDERR-NEXT: remark: foo.cl:52:0: SGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:52:0: VGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:52:0: ------------------------------ +define void @empty_func() !dbg !8 { + ret void +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "foo.cl", directory: "/tmp") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "test_kernel", scope: !1, file: !1, type: !4, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!4 = !DISubroutineType(types: !5) +!5 = !{null} +!6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)