Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -144,6 +144,10 @@ } void AMDGPUAsmPrinter::EmitFunctionBodyStart() { + const AMDGPUMachineFunction *MFI = MF->getInfo(); + if (!MFI->isEntryFunction()) + return; + const AMDGPUSubtarget &STM = MF->getSubtarget(); SIProgramInfo KernelInfo; amd_kernel_code_t KernelCode; @@ -222,13 +226,19 @@ OutStreamer->SwitchSection(CommentSection); if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - OutStreamer->emitRawComment(" Kernel info:", false); + if (MFI->isEntryFunction()) { + OutStreamer->emitRawComment(" Kernel info:", false); + } else { + OutStreamer->emitRawComment(" Function info:", false); + } + OutStreamer->emitRawComment(" codeLenInByte = " + Twine(getFunctionCodeSize(MF)), false); OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR), false); OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR), false); + OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode), false); OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode), @@ -238,6 +248,9 @@ OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) + " bytes/workgroup (compile time only)", false); + if (!MFI->isEntryFunction()) + return false; + OutStreamer->emitRawComment(" SGPRBlocks: " + Twine(KernelInfo.SGPRBlocks), false); OutStreamer->emitRawComment(" VGPRBlocks: " + Index: test/CodeGen/AMDGPU/hsa-func.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-func.ll +++ test/CodeGen/AMDGPU/hsa-func.ll @@ -27,7 +27,7 @@ ; ELF: Symbol { ; ELF: Name: simple -; ELF: Size: 304 +; ELF: Size: 48 ; ELF: Type: Function (0x2) ; ELF: } @@ -40,10 +40,7 @@ ; HSA: .globl simple ; HSA: .p2align 2 ; HSA: {{^}}simple: -; HSA: .amd_kernel_code_t -; HSA: enable_sgpr_private_segment_buffer = 0 -; HSA: enable_sgpr_kernarg_segment_ptr = 0 -; HSA: .end_amd_kernel_code_t +; HSA-NOT: amd_kernel_code_t ; HSA-NOT: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0 ; Make sure we are setting the ATC bit: @@ -55,7 +52,8 @@ ; HSA: .Lfunc_end0: ; HSA: .size simple, .Lfunc_end0-simple - +; HSA: ; Function info: +; HSA-NOT: COMPUTE_PGM_RSRC2 define void @simple(i32 addrspace(1)* addrspace(2)* %ptr.out) { entry: %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out