diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -27,6 +27,7 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -105,15 +106,19 @@ const TargetMachine &TM = TPC->getTM(); bool HasIndirectCall = false; - for (Function &F : M) { - if (F.isDeclaration()) + CallGraph CG = CallGraph(M); + auto End = po_end(&CG); + + for (auto IT = po_begin(&CG); IT != End; ++IT) { + Function *F = IT->getFunction(); + if (!F || F->isDeclaration()) continue; - MachineFunction *MF = MMI.getMachineFunction(F); + MachineFunction *MF = MMI.getMachineFunction(*F); assert(MF && "function must have been generated already"); auto CI = CallGraphResourceInfo.insert( - std::make_pair(&F, SIFunctionResourceInfo())); + std::make_pair(F, SIFunctionResourceInfo())); SIFunctionResourceInfo &Info = CI.first->second; assert(CI.second && "should only be called once per function"); Info = analyzeResourceUsage(*MF, TM); diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx1010 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s + +; CHECK: amdhsa.kernels: + +; test a kernel that occurs before its callee in the module +; CHECK: - .args: +; CHECK: .private_segment_fixed_size: 0 +define amdgpu_kernel void @test1() { + call void @f() + ret void +} + +define void @f() #0 { + ret void +} + +; test a kernel that occurs after its callee in the module +; CHECK: - .args: +; CHECK: .private_segment_fixed_size: 0 +define amdgpu_kernel void @test2() { + call void @f() + ret void +} + +attributes #0 = { norecurse } \ No newline at end of file