Index: llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h +++ llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h @@ -20,6 +20,7 @@ namespace llvm { +class GCNTargetMachine; class GCNSubtarget; class MachineFunction; class TargetMachine; @@ -71,12 +72,16 @@ return Info->getSecond(); } + const SIFunctionResourceInfo &getWorstCaseResourceInfo(const Module &M); + private: - SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF, - const TargetMachine &TM) const; - void propagateIndirectCallRegisterUsage(); + void computeWorstCaseModuleRegisterUsage(const Module &M); + + SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF); + const GCNTargetMachine *TM = nullptr; DenseMap CallGraphResourceInfo; + Optional ModuleWorstCaseInfo; }; } // namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURESOURCEUSAGEANALYSIS_H Index: llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -25,6 +25,7 @@ #include "AMDGPUResourceUsageAnalysis.h" #include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" #include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/CallGraph.h" @@ -102,8 +103,7 @@ if (!TPC) return false; - const TargetMachine &TM = TPC->getTM(); - bool HasIndirectCall = false; + TM = static_cast(&TPC->getTM()); for (CallGraphNode *I : SCC) { Function *F = I->getFunction(); @@ -118,19 +118,14 @@ std::make_pair(&MF.getFunction(), SIFunctionResourceInfo())); SIFunctionResourceInfo &Info = CI.first->second; assert(CI.second && "should only be called once per function"); - Info = analyzeResourceUsage(MF, TM); - HasIndirectCall |= Info.HasIndirectCall; + Info = analyzeResourceUsage(MF); } - if (HasIndirectCall) - propagateIndirectCallRegisterUsage(); - return false; } AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo -AMDGPUResourceUsageAnalysis::analyzeResourceUsage( - const MachineFunction &MF, const TargetMachine &TM) const { +AMDGPUResourceUsageAnalysis::analyzeResourceUsage(const MachineFunction &MF) { SIFunctionResourceInfo Info; const SIMachineFunctionInfo *MFI = MF.getInfo(); @@ -476,9 +471,16 @@ std::max(CalleeFrameSize, static_cast(AssumedStackSizeForExternalCall)); + const SIFunctionResourceInfo &WorstCase = + getWorstCaseResourceInfo(*MF.getFunction().getParent()); + MaxSGPR = std::max(WorstCase.NumExplicitSGPR - 1, MaxSGPR); + MaxVGPR = std::max(WorstCase.NumVGPR - 1, MaxVGPR); + MaxAGPR = std::max(WorstCase.NumAGPR - 1, MaxAGPR); + // Register usage of indirect calls gets handled later Info.UsesVCC = true; - Info.UsesFlatScratch = ST.hasFlatAddressSpace(); + Info.UsesFlatScratch |= + WorstCase.UsesFlatScratch && ST.hasFlatAddressSpace(); Info.HasDynamicallySizedStack = true; Info.HasIndirectCall = true; } else { @@ -507,31 +509,49 @@ return Info; } -void AMDGPUResourceUsageAnalysis::propagateIndirectCallRegisterUsage() { - // Collect the maximum number of registers from non-hardware-entrypoints. - // All these functions are potential targets for indirect calls. - int32_t NonKernelMaxSGPRs = 0; - int32_t NonKernelMaxVGPRs = 0; - int32_t NonKernelMaxAGPRs = 0; - - for (const auto &I : CallGraphResourceInfo) { - if (!AMDGPU::isEntryFunctionCC(I.getFirst()->getCallingConv())) { - auto &Info = I.getSecond(); - NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, Info.NumExplicitSGPR); - NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, Info.NumVGPR); - NonKernelMaxAGPRs = std::max(NonKernelMaxAGPRs, Info.NumAGPR); - } - } +const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo & +AMDGPUResourceUsageAnalysis::getWorstCaseResourceInfo(const Module &M) { + if (ModuleWorstCaseInfo) + return *ModuleWorstCaseInfo; + + computeWorstCaseModuleRegisterUsage(M); + return *ModuleWorstCaseInfo; +} + +/// Find the worst case register usage for all callable functions in the module, +/// assuming all reachable functions are defined in the current module. +void AMDGPUResourceUsageAnalysis::computeWorstCaseModuleRegisterUsage( + const Module &M) { + assert(!ModuleWorstCaseInfo); + ModuleWorstCaseInfo = SIFunctionResourceInfo(); + ModuleWorstCaseInfo->UsesVCC = true; + ModuleWorstCaseInfo->HasDynamicallySizedStack = true; + ModuleWorstCaseInfo->HasRecursion = true; + ModuleWorstCaseInfo->HasIndirectCall = true; + + for (const Function &F : M) { + if (F.isIntrinsic()) + continue; - // Add register usage for functions with indirect calls. - // For calls to unknown functions, we assume the maximum register usage of - // all non-hardware-entrypoints in the current module. - for (auto &I : CallGraphResourceInfo) { - auto &Info = I.getSecond(); - if (Info.HasIndirectCall) { - Info.NumExplicitSGPR = std::max(Info.NumExplicitSGPR, NonKernelMaxSGPRs); - Info.NumVGPR = std::max(Info.NumVGPR, NonKernelMaxVGPRs); - Info.NumAGPR = std::max(Info.NumAGPR, NonKernelMaxAGPRs); + if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) + continue; + + const GCNSubtarget &ST = TM->getSubtarget(F); + const int32_t MaxVGPR = ST.getMaxNumVGPRs(F); + const int32_t MaxSGPR = ST.getMaxNumSGPRs(F); + + ModuleWorstCaseInfo->NumVGPR = + std::max(ModuleWorstCaseInfo->NumVGPR, MaxVGPR); + + if (ST.hasMAIInsts()) { + const int32_t MaxAGPR = ST.getMaxNumAGPRs(F); + ModuleWorstCaseInfo->NumAGPR = + std::max(ModuleWorstCaseInfo->NumAGPR, MaxAGPR); } + + ModuleWorstCaseInfo->NumExplicitSGPR = + std::max(ModuleWorstCaseInfo->NumExplicitSGPR, MaxSGPR); + + ModuleWorstCaseInfo->UsesFlatScratch |= ST.hasFlatAddressSpace(); } } Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1110,6 +1110,10 @@ /// unit requirement. unsigned getMaxNumVGPRs(const Function &F) const; + unsigned getMaxNumAGPRs(const Function &F) const { + return getMaxNumVGPRs(F); + } + /// \returns Maximum number of VGPRs that meets number of waves per execution /// unit requirement for function \p MF, or number of VGPRs explicitly /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. Index: llvm/test/CodeGen/AMDGPU/agpr-register-count.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/agpr-register-count.ll +++ llvm/test/CodeGen/AMDGPU/agpr-register-count.ll @@ -154,22 +154,23 @@ declare void @undef_func() ; GCN-LABEL: {{^}}kernel_call_undef_func: -; GFX908: .amdhsa_next_free_vgpr 32 -; GFX90A: .amdhsa_next_free_vgpr 64 -; GFX90A: .amdhsa_accum_offset 32 +; GFX908: .amdhsa_next_free_vgpr 128 +; GFX90A: .amdhsa_next_free_vgpr 512 +; GFX90A: .amdhsa_accum_offset 256 ; GCN908: NumVgprs: 128 +; GCN908: NumAgprs: 128 ; GCN90A: NumVgprs: 256 -; GCN: NumAgprs: 32 -; GFX908: TotalNumVgprs: 32 -; GFX90A: TotalNumVgprs: 64 -; GFX908: VGPRBlocks: 7 -; GFX90A: VGPRBlocks: 7 -; GFX908: NumVGPRsForWavesPerEU: 32 -; GFX90A: NumVGPRsForWavesPerEU: 64 -; GFX90A: AccumOffset: 32 -; GFX908: Occupancy: 8 -; GFX90A: Occupancy: 8 -; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 7 +; GCN90A: NumAgprs: 256 +; GFX908: TotalNumVgprs: 128 +; GFX90A: TotalNumVgprs: 512 +; GFX908: VGPRBlocks: 31 +; GFX90A: VGPRBlocks: 63 +; GFX908: NumVGPRsForWavesPerEU: 128 +; GFX90A: NumVGPRsForWavesPerEU: 512 +; GFX90A: AccumOffset: 256 +; GFX908: Occupancy: 2 +; GFX90A: Occupancy: 1 +; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 63 define amdgpu_kernel void @kernel_call_undef_func() #0 { bb: call void @undef_func() Index: llvm/test/CodeGen/AMDGPU/amdpal-callable.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -144,8 +144,8 @@ ; GCN: amdpal.pipelines: ; GCN-NEXT: - .registers: -; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}} -; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ce{{$}} +; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}} +; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}} ; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}} ; GCN-NEXT: .shader_functions: ; GCN-NEXT: dynamic_stack: @@ -178,24 +178,24 @@ ; GCN-NEXT: .vgpr_count: 0x2{{$}} ; GCN-NEXT: no_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x28{{$}} -; GFX9-NEXT: .sgpr_count: 0x2c{{$}} +; GFX8-NEXT: .sgpr_count: 0x68{{$}} +; GFX9-NEXT: .sgpr_count: 0x6c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} -; GCN-NEXT: .vgpr_count: 0x29{{$}} +; GCN-NEXT: .vgpr_count: 0x40{{$}} ; GCN-NEXT: no_stack_extern_call_many_args: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x28{{$}} -; GFX9-NEXT: .sgpr_count: 0x2c{{$}} +; GFX8-NEXT: .sgpr_count: 0x68{{$}} +; GFX9-NEXT: .sgpr_count: 0x6c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}} -; SDAG-NEXT: .vgpr_count: 0x2a{{$}} -; GISEL-NEXT: .vgpr_count: 0x34{{$}} +; SDAG-NEXT: .vgpr_count: 0x40{{$}} +; GISEL-NEXT: .vgpr_count: 0x40{{$}} ; GCN-NEXT: no_stack_indirect_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x28{{$}} -; GFX9-NEXT: .sgpr_count: 0x2c{{$}} +; GFX8-NEXT: .sgpr_count: 0x68{{$}} +; GFX9-NEXT: .sgpr_count: 0x6c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} -; SDAG-NEXT: .vgpr_count: 0x2a{{$}} -; GISEL-NEXT: .vgpr_count: 0x34{{$}} +; SDAG-NEXT: .vgpr_count: 0x40{{$}} +; GISEL-NEXT: .vgpr_count: 0x40{{$}} ; GCN-NEXT: simple_lds: ; GCN-NEXT: .lds_size: 0x100{{$}} ; GCN-NEXT: .sgpr_count: 0x20{{$}} @@ -218,17 +218,17 @@ ; GCN-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: simple_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x28{{$}} -; GFX9-NEXT: .sgpr_count: 0x2c{{$}} +; GFX8-NEXT: .sgpr_count: 0x68{{$}} +; GFX9-NEXT: .sgpr_count: 0x6c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} -; GCN-NEXT: .vgpr_count: 0x2a{{$}} +; GCN-NEXT: .vgpr_count: 0x40{{$}} ; GCN-NEXT: simple_stack_indirect_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x28{{$}} -; GFX9-NEXT: .sgpr_count: 0x2c{{$}} +; GFX8-NEXT: .sgpr_count: 0x68{{$}} +; GFX9-NEXT: .sgpr_count: 0x6c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} -; SDAG-NEXT: .vgpr_count: 0x2b{{$}} -; GISEL-NEXT: .vgpr_count: 0x34{{$}} +; SDAG-NEXT: .vgpr_count: 0x40{{$}} +; GISEL-NEXT: .vgpr_count: 0x40{{$}} ; GCN-NEXT: simple_stack_recurse: ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x26{{$}} Index: llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll +++ llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll @@ -556,9 +556,9 @@ ; GCN-LABEL: {{^}}f1024: ; GFX9: NumVgprs: 64 -; GFX90A: NumVgprs: 64 -; GFX90A: NumAgprs: 64 -; GFX90A: TotalNumVgprs: 128 +; GFX90A: NumVgprs: 128 +; GFX90A: NumAgprs: 128 +; GFX90A: TotalNumVgprs: 256 ; GFX10WGP-WAVE32: NumVgprs: 128 ; GFX10WGP-WAVE64: NumVgprs: 128 ; GFX10CU-WAVE32: NumVgprs: 64 Index: llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll @@ -0,0 +1,31 @@ +; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefixes=ALL,GFX908 %s +; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL,GFX90A %s + +; CallGraphAnalysis, which CodeGenSCC order depends on, does not look +; through aliases. If GlobalOpt is never run, we do not see direct +; calls, + +@alias = hidden alias void (), void ()* @aliasee_default + +; ALL-LABEL: {{^}}kernel: +; GFX908: .amdhsa_next_free_vgpr 64 +; GFX908-NEXT: .amdhsa_next_free_sgpr 102 + +; GFX90A: .amdhsa_next_free_vgpr 256 +; GFX90A-NEXT: .amdhsa_next_free_sgpr 102 +; GFX90A-NEXT: .amdhsa_accum_offset 128 +define amdgpu_kernel void @kernel() #0 { +bb: + call void @alias() #2 + ret void +} + +define internal void @aliasee_default() #1 { +bb: + call void asm sideeffect "; clobber a26 ", "~{a26}"() + ret void +} + +attributes #0 = { noinline norecurse nounwind optnone } +attributes #1 = { noinline norecurse nounwind readnone willreturn } +attributes #2 = { nounwind readnone willreturn } Index: llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll @@ -0,0 +1,26 @@ +; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s + +; CallGraphAnalysis, which CodeGenSCC order depends on, does not look +; through aliases. If GlobalOpt is never run, we do not see direct +; calls, + +@alias0 = hidden alias void (), void ()* @aliasee_default_vgpr64_sgpr102 + +; CHECK-LABEL: {{^}}kernel0: +; CHECK: .amdhsa_next_free_vgpr 64 +; CHECK-NEXT: .amdhsa_next_free_sgpr 102 +define amdgpu_kernel void @kernel0() #0 { +bb: + call void @alias0() #2 + ret void +} + +define internal void @aliasee_default_vgpr64_sgpr102() #1 { +bb: + call void asm sideeffect "; clobber v52 ", "~{v52}"() + ret void +} + +attributes #0 = { noinline norecurse nounwind optnone } +attributes #1 = { noinline norecurse nounwind readnone willreturn } +attributes #2 = { nounwind readnone willreturn } Index: llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll @@ -0,0 +1,29 @@ +; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s + +; CallGraphAnalysis, which CodeGenSCC order depends on, does not look +; through aliases. If GlobalOpt is never run, we do not see direct +; calls, + +@alias1 = hidden alias void (), void ()* @aliasee_vgpr32_sgpr76 + +; The parent kernel has a higher VGPR usage than the possible callees. + +; CHECK-LABEL: {{^}}kernel1: +; CHECK: .amdhsa_next_free_vgpr 42 +; CHECK-NEXT: .amdhsa_next_free_sgpr 74 +define amdgpu_kernel void @kernel1() #0 { +bb: + call void asm sideeffect "; clobber v40 ", "~{v40}"() + call void @alias1() #2 + ret void +} + +define internal void @aliasee_vgpr32_sgpr76() #1 { +bb: + call void asm sideeffect "; clobber v26 ", "~{v26}"() + ret void +} + +attributes #0 = { noinline norecurse nounwind optnone } +attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="8,10" } +attributes #2 = { nounwind readnone willreturn } Index: llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll @@ -0,0 +1,26 @@ +; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s + +; CallGraphAnalysis, which CodeGenSCC order depends on, does not look +; through aliases. If GlobalOpt is never run, we do not see direct +; calls, + +@alias2 = hidden alias void (), void()* @aliasee_vgpr64_sgpr102 + +; CHECK-LABEL: {{^}}kernel2: +; CHECK: .amdhsa_next_free_vgpr 64 +; CHECK-NEXT: .amdhsa_next_free_sgpr 102 +define amdgpu_kernel void @kernel2() #0 { +bb: + call void @alias2() #2 + ret void +} + +define internal void @aliasee_vgpr64_sgpr102() #1 { +bb: + call void asm sideeffect "; clobber v52 ", "~{v52}"() + ret void +} + +attributes #0 = { noinline norecurse nounwind optnone } +attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="4,10" } +attributes #2 = { nounwind readnone willreturn } Index: llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll @@ -0,0 +1,26 @@ +; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s + +; CallGraphAnalysis, which CodeGenSCC order depends on, does not look +; through aliases. If GlobalOpt is never run, we do not see direct +; calls, + +@alias3 = hidden alias void (), void ()* @aliasee_vgpr256_sgpr102 + +; CHECK-LABEL: {{^}}kernel3: +; CHECK: .amdhsa_next_free_vgpr 256 +; CHECK-NEXT: .amdhsa_next_free_sgpr 102 +define amdgpu_kernel void @kernel3() #0 { +bb: + call void @alias3() #2 + ret void +} + +define internal void @aliasee_vgpr256_sgpr102() #1 { +bb: + call void asm sideeffect "; clobber v252 ", "~{v252}"() + ret void +} + +attributes #0 = { noinline norecurse nounwind optnone } +attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,1" } +attributes #2 = { nounwind readnone willreturn } Index: llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -227,10 +227,10 @@ ; Make sure there's no assert when a sgpr96 is used. ; GCN-LABEL: {{^}}count_use_sgpr96_external_call ; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}] -; CI: NumSgprs: 84 -; VI-NOBUG: NumSgprs: 86 +; CI: NumSgprs: 104 +; VI-NOBUG: NumSgprs: 108 ; VI-BUG: NumSgprs: 96 -; GCN: NumVgprs: 50 +; GCN: NumVgprs: 64 define amdgpu_kernel void @count_use_sgpr96_external_call() { entry: tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> ) #1 @@ -241,10 +241,10 @@ ; Make sure there's no assert when a sgpr160 is used. ; GCN-LABEL: {{^}}count_use_sgpr160_external_call ; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}] -; CI: NumSgprs: 84 -; VI-NOBUG: NumSgprs: 86 +; CI: NumSgprs: 104 +; VI-NOBUG: NumSgprs: 108 ; VI-BUG: NumSgprs: 96 -; GCN: NumVgprs: 50 +; GCN: NumVgprs: 64 define amdgpu_kernel void @count_use_sgpr160_external_call() { entry: tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> ) #1 @@ -255,10 +255,10 @@ ; Make sure there's no assert when a vgpr160 is used. ; GCN-LABEL: {{^}}count_use_vgpr160_external_call ; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}] -; CI: NumSgprs: 84 -; VI-NOBUG: NumSgprs: 86 +; CI: NumSgprs: 104 +; VI-NOBUG: NumSgprs: 108 ; VI-BUG: NumSgprs: 96 -; GCN: NumVgprs: 50 +; GCN: NumVgprs: 64 define amdgpu_kernel void @count_use_vgpr160_external_call() { entry: tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> ) #1 Index: llvm/test/CodeGen/AMDGPU/indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -16,8 +16,8 @@ ; GCN-NEXT: amd_machine_version_stepping = 0 ; GCN-NEXT: kernel_code_entry_byte_offset = 256 ; GCN-NEXT: kernel_code_prefetch_byte_size = 0 -; GCN-NEXT: granulated_workitem_vgpr_count = 7 -; GCN-NEXT: granulated_wavefront_sgpr_count = 4 +; GCN-NEXT: granulated_workitem_vgpr_count = 15 +; GCN-NEXT: granulated_wavefront_sgpr_count = 12 ; GCN-NEXT: priority = 0 ; GCN-NEXT: float_mode = 240 ; GCN-NEXT: priv = 0 @@ -60,8 +60,8 @@ ; GCN-NEXT: gds_segment_byte_size = 0 ; GCN-NEXT: kernarg_segment_byte_size = 64 ; GCN-NEXT: workgroup_fbarrier_count = 0 -; GCN-NEXT: wavefront_sgpr_count = 37 -; GCN-NEXT: workitem_vgpr_count = 32 +; GCN-NEXT: wavefront_sgpr_count = 104 +; GCN-NEXT: workitem_vgpr_count = 64 ; GCN-NEXT: reserved_vgpr_first = 0 ; GCN-NEXT: reserved_vgpr_count = 0 ; GCN-NEXT: reserved_sgpr_first = 0 @@ -109,8 +109,8 @@ ; GISEL-NEXT: amd_machine_version_stepping = 0 ; GISEL-NEXT: kernel_code_entry_byte_offset = 256 ; GISEL-NEXT: kernel_code_prefetch_byte_size = 0 -; GISEL-NEXT: granulated_workitem_vgpr_count = 7 -; GISEL-NEXT: granulated_wavefront_sgpr_count = 4 +; GISEL-NEXT: granulated_workitem_vgpr_count = 15 +; GISEL-NEXT: granulated_wavefront_sgpr_count = 12 ; GISEL-NEXT: priority = 0 ; GISEL-NEXT: float_mode = 240 ; GISEL-NEXT: priv = 0 @@ -153,8 +153,8 @@ ; GISEL-NEXT: gds_segment_byte_size = 0 ; GISEL-NEXT: kernarg_segment_byte_size = 64 ; GISEL-NEXT: workgroup_fbarrier_count = 0 -; GISEL-NEXT: wavefront_sgpr_count = 37 -; GISEL-NEXT: workitem_vgpr_count = 32 +; GISEL-NEXT: wavefront_sgpr_count = 104 +; GISEL-NEXT: workitem_vgpr_count = 64 ; GISEL-NEXT: reserved_vgpr_first = 0 ; GISEL-NEXT: reserved_vgpr_count = 0 ; GISEL-NEXT: reserved_sgpr_first = 0 @@ -207,8 +207,8 @@ ; GCN-NEXT: amd_machine_version_stepping = 0 ; GCN-NEXT: kernel_code_entry_byte_offset = 256 ; GCN-NEXT: kernel_code_prefetch_byte_size = 0 -; GCN-NEXT: granulated_workitem_vgpr_count = 7 -; GCN-NEXT: granulated_wavefront_sgpr_count = 4 +; GCN-NEXT: granulated_workitem_vgpr_count = 15 +; GCN-NEXT: granulated_wavefront_sgpr_count = 12 ; GCN-NEXT: priority = 0 ; GCN-NEXT: float_mode = 240 ; GCN-NEXT: priv = 0 @@ -251,8 +251,8 @@ ; GCN-NEXT: gds_segment_byte_size = 0 ; GCN-NEXT: kernarg_segment_byte_size = 64 ; GCN-NEXT: workgroup_fbarrier_count = 0 -; GCN-NEXT: wavefront_sgpr_count = 37 -; GCN-NEXT: workitem_vgpr_count = 32 +; GCN-NEXT: wavefront_sgpr_count = 104 +; GCN-NEXT: workitem_vgpr_count = 64 ; GCN-NEXT: reserved_vgpr_first = 0 ; GCN-NEXT: reserved_vgpr_count = 0 ; GCN-NEXT: reserved_sgpr_first = 0 @@ -301,8 +301,8 @@ ; GISEL-NEXT: amd_machine_version_stepping = 0 ; GISEL-NEXT: kernel_code_entry_byte_offset = 256 ; GISEL-NEXT: kernel_code_prefetch_byte_size = 0 -; GISEL-NEXT: granulated_workitem_vgpr_count = 7 -; GISEL-NEXT: granulated_wavefront_sgpr_count = 4 +; GISEL-NEXT: granulated_workitem_vgpr_count = 15 +; GISEL-NEXT: granulated_wavefront_sgpr_count = 12 ; GISEL-NEXT: priority = 0 ; GISEL-NEXT: float_mode = 240 ; GISEL-NEXT: priv = 0 @@ -345,8 +345,8 @@ ; GISEL-NEXT: gds_segment_byte_size = 0 ; GISEL-NEXT: kernarg_segment_byte_size = 64 ; GISEL-NEXT: workgroup_fbarrier_count = 0 -; GISEL-NEXT: wavefront_sgpr_count = 37 -; GISEL-NEXT: workitem_vgpr_count = 32 +; GISEL-NEXT: wavefront_sgpr_count = 104 +; GISEL-NEXT: workitem_vgpr_count = 64 ; GISEL-NEXT: reserved_vgpr_first = 0 ; GISEL-NEXT: reserved_vgpr_count = 0 ; GISEL-NEXT: reserved_sgpr_first = 0