Index: llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h +++ llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h @@ -20,12 +20,11 @@ namespace llvm { -class GCNTargetMachine; class GCNSubtarget; class MachineFunction; class TargetMachine; -struct AMDGPUResourceUsageAnalysis : public CallGraphSCCPass { +struct AMDGPUResourceUsageAnalysis : public ModulePass { static char ID; public: @@ -51,15 +50,15 @@ int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const; }; - AMDGPUResourceUsageAnalysis() : CallGraphSCCPass(ID) {} + AMDGPUResourceUsageAnalysis() : ModulePass(ID) {} - bool runOnSCC(CallGraphSCC &SCC) override; - - bool doInitialization(CallGraph &CG) override { + bool doInitialization(Module &M) override { CallGraphResourceInfo.clear(); - return CallGraphSCCPass::doInitialization(CG); + return ModulePass::doInitialization(M); } + bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.setPreservesAll(); @@ -72,16 +71,12 @@ return Info->getSecond(); } - const SIFunctionResourceInfo &getWorstCaseResourceInfo(const Module &M); - private: - void computeWorstCaseModuleRegisterUsage(const Module &M); - - SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF); + SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF, + const TargetMachine &TM) const; + void propagateIndirectCallRegisterUsage(); - const GCNTargetMachine *TM = nullptr; DenseMap CallGraphResourceInfo; - Optional ModuleWorstCaseInfo; }; } // namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURESOURCEUSAGEANALYSIS_H Index: llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -25,7 +25,6 @@ #include "AMDGPUResourceUsageAnalysis.h" #include "AMDGPU.h" -#include "AMDGPUTargetMachine.h" #include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/CallGraph.h" @@ -98,34 +97,39 @@ return getTotalNumVGPRs(ST, NumAGPR, NumVGPR); } -bool AMDGPUResourceUsageAnalysis::runOnSCC(CallGraphSCC &SCC) { +bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) { auto *TPC = getAnalysisIfAvailable(); if (!TPC) return false; - TM = static_cast(&TPC->getTM()); + MachineModuleInfo &MMI = getAnalysis().getMMI(); + const TargetMachine &TM = TPC->getTM(); + bool HasIndirectCall = false; - for (CallGraphNode *I : SCC) { - Function *F = I->getFunction(); - if (!F || F->isDeclaration()) + for (Function &F : M) { + if (F.isDeclaration()) continue; - MachineModuleInfo &MMI = - getAnalysis().getMMI(); - MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); + MachineFunction *MF = MMI.getMachineFunction(F); + assert(MF && "function must have been generated already"); auto CI = CallGraphResourceInfo.insert( - std::make_pair(&MF.getFunction(), SIFunctionResourceInfo())); + std::make_pair(&F, SIFunctionResourceInfo())); SIFunctionResourceInfo &Info = CI.first->second; assert(CI.second && "should only be called once per function"); - Info = analyzeResourceUsage(MF); + Info = analyzeResourceUsage(*MF, TM); + HasIndirectCall |= Info.HasIndirectCall; } + if (HasIndirectCall) + propagateIndirectCallRegisterUsage(); + return false; } AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo -AMDGPUResourceUsageAnalysis::analyzeResourceUsage(const MachineFunction &MF) { +AMDGPUResourceUsageAnalysis::analyzeResourceUsage( + const MachineFunction &MF, const TargetMachine &TM) const { SIFunctionResourceInfo Info; const SIMachineFunctionInfo *MFI = MF.getInfo(); @@ -471,16 +475,9 @@ std::max(CalleeFrameSize, static_cast(AssumedStackSizeForExternalCall)); - const SIFunctionResourceInfo &WorstCase = - getWorstCaseResourceInfo(*MF.getFunction().getParent()); - MaxSGPR = std::max(WorstCase.NumExplicitSGPR - 1, MaxSGPR); - MaxVGPR = std::max(WorstCase.NumVGPR - 1, MaxVGPR); - MaxAGPR = std::max(WorstCase.NumAGPR - 1, MaxAGPR); - // Register usage of indirect calls gets handled later Info.UsesVCC = true; - Info.UsesFlatScratch |= - WorstCase.UsesFlatScratch && ST.hasFlatAddressSpace(); + Info.UsesFlatScratch = ST.hasFlatAddressSpace(); Info.HasDynamicallySizedStack = true; Info.HasIndirectCall = true; } else { @@ -509,49 +506,31 @@ return Info; } -const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo & -AMDGPUResourceUsageAnalysis::getWorstCaseResourceInfo(const Module &M) { - if (ModuleWorstCaseInfo) - return *ModuleWorstCaseInfo; - - computeWorstCaseModuleRegisterUsage(M); - return *ModuleWorstCaseInfo; -} - -/// Find the worst case register usage for all callable functions in the module, -/// assuming all reachable functions are defined in the current module. -void AMDGPUResourceUsageAnalysis::computeWorstCaseModuleRegisterUsage( - const Module &M) { - assert(!ModuleWorstCaseInfo); - ModuleWorstCaseInfo = SIFunctionResourceInfo(); - ModuleWorstCaseInfo->UsesVCC = true; - ModuleWorstCaseInfo->HasDynamicallySizedStack = true; - ModuleWorstCaseInfo->HasRecursion = true; - ModuleWorstCaseInfo->HasIndirectCall = true; - - for (const Function &F : M) { - if (F.isIntrinsic()) - continue; - - if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) - continue; - - const GCNSubtarget &ST = TM->getSubtarget(F); - const int32_t MaxVGPR = ST.getMaxNumVGPRs(F); - const int32_t MaxSGPR = ST.getMaxNumSGPRs(F); - - ModuleWorstCaseInfo->NumVGPR = - std::max(ModuleWorstCaseInfo->NumVGPR, MaxVGPR); - - if (ST.hasMAIInsts()) { - const int32_t MaxAGPR = ST.getMaxNumAGPRs(F); - ModuleWorstCaseInfo->NumAGPR = - std::max(ModuleWorstCaseInfo->NumAGPR, MaxAGPR); +void AMDGPUResourceUsageAnalysis::propagateIndirectCallRegisterUsage() { + // Collect the maximum number of registers from non-hardware-entrypoints. + // All these functions are potential targets for indirect calls. + int32_t NonKernelMaxSGPRs = 0; + int32_t NonKernelMaxVGPRs = 0; + int32_t NonKernelMaxAGPRs = 0; + + for (const auto &I : CallGraphResourceInfo) { + if (!AMDGPU::isEntryFunctionCC(I.getFirst()->getCallingConv())) { + auto &Info = I.getSecond(); + NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, Info.NumExplicitSGPR); + NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, Info.NumVGPR); + NonKernelMaxAGPRs = std::max(NonKernelMaxAGPRs, Info.NumAGPR); } + } - ModuleWorstCaseInfo->NumExplicitSGPR = - std::max(ModuleWorstCaseInfo->NumExplicitSGPR, MaxSGPR); - - ModuleWorstCaseInfo->UsesFlatScratch |= ST.hasFlatAddressSpace(); + // Add register usage for functions with indirect calls. + // For calls to unknown functions, we assume the maximum register usage of + // all non-hardware-entrypoints in the current module. + for (auto &I : CallGraphResourceInfo) { + auto &Info = I.getSecond(); + if (Info.HasIndirectCall) { + Info.NumExplicitSGPR = std::max(Info.NumExplicitSGPR, NonKernelMaxSGPRs); + Info.NumVGPR = std::max(Info.NumVGPR, NonKernelMaxVGPRs); + Info.NumAGPR = std::max(Info.NumAGPR, NonKernelMaxAGPRs); + } } } Index: llvm/test/CodeGen/AMDGPU/agpr-register-count.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/agpr-register-count.ll +++ llvm/test/CodeGen/AMDGPU/agpr-register-count.ll @@ -154,23 +154,23 @@ declare void @undef_func() ; GCN-LABEL: {{^}}kernel_call_undef_func: -; GFX908: .amdhsa_next_free_vgpr 128 -; GFX90A: .amdhsa_next_free_vgpr 512 -; GFX90A: .amdhsa_accum_offset 256 +; GFX908: .amdhsa_next_free_vgpr 32 +; GFX90A: .amdhsa_next_free_vgpr 64 +; GFX90A: .amdhsa_accum_offset 32 ; GCN908: NumVgprs: 128 ; GCN908: NumAgprs: 128 ; GCN90A: NumVgprs: 256 ; GCN90A: NumAgprs: 256 -; GFX908: TotalNumVgprs: 128 -; GFX90A: TotalNumVgprs: 512 -; GFX908: VGPRBlocks: 31 -; GFX90A: VGPRBlocks: 63 -; GFX908: NumVGPRsForWavesPerEU: 128 -; GFX90A: NumVGPRsForWavesPerEU: 512 -; GFX90A: AccumOffset: 256 -; GFX908: Occupancy: 2 -; GFX90A: Occupancy: 1 -; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 63 +; GFX908: TotalNumVgprs: 32 +; GFX90A: TotalNumVgprs: 64 +; GFX908: VGPRBlocks: 7 +; GFX90A: VGPRBlocks: 7 +; GFX908: NumVGPRsForWavesPerEU: 32 +; GFX90A: NumVGPRsForWavesPerEU: 64 +; GFX90A: AccumOffset: 32 +; GFX908: Occupancy: 8 +; GFX90A: Occupancy: 8 +; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 7 define amdgpu_kernel void @kernel_call_undef_func() #0 { bb: call void @undef_func() Index: llvm/test/CodeGen/AMDGPU/amdpal-callable.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -144,8 +144,8 @@ ; GCN: amdpal.pipelines: ; GCN-NEXT: - .registers: -; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}} -; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}} +; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}} +; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ce{{$}} ; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}} ; GCN-NEXT: .shader_functions: ; GCN-NEXT: dynamic_stack: @@ -178,24 +178,25 @@ ; GCN-NEXT: .vgpr_count: 0x2{{$}} ; GCN-NEXT: no_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x68{{$}} -; GFX9-NEXT: .sgpr_count: 0x6c{{$}} +; GFX8-NEXT: .sgpr_count: 0x28{{$}} +; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} -; GCN-NEXT: .vgpr_count: 0x40{{$}} +; SDAG-NEXT: .vgpr_count: 0x2b{{$}} +; GISEL-NEXT: .vgpr_count: 0x34{{$}} ; GCN-NEXT: no_stack_extern_call_many_args: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x68{{$}} -; GFX9-NEXT: .sgpr_count: 0x6c{{$}} +; GFX8-NEXT: .sgpr_count: 0x28{{$}} +; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}} -; SDAG-NEXT: .vgpr_count: 0x40{{$}} -; GISEL-NEXT: .vgpr_count: 0x40{{$}} +; SDAG-NEXT: .vgpr_count: 0x2b{{$}} +; GISEL-NEXT: .vgpr_count: 0x34{{$}} ; GCN-NEXT: no_stack_indirect_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x68{{$}} -; GFX9-NEXT: .sgpr_count: 0x6c{{$}} +; GFX8-NEXT: .sgpr_count: 0x28{{$}} +; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} -; SDAG-NEXT: .vgpr_count: 0x40{{$}} -; GISEL-NEXT: .vgpr_count: 0x40{{$}} +; SDAG-NEXT: .vgpr_count: 0x2b{{$}} +; GISEL-NEXT: .vgpr_count: 0x34{{$}} ; GCN-NEXT: simple_lds: ; GCN-NEXT: .lds_size: 0x100{{$}} ; GCN-NEXT: .sgpr_count: 0x20{{$}} @@ -218,17 +219,18 @@ ; GCN-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: simple_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x68{{$}} -; GFX9-NEXT: .sgpr_count: 0x6c{{$}} +; GFX8-NEXT: .sgpr_count: 0x28{{$}} +; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} -; GCN-NEXT: .vgpr_count: 0x40{{$}} +; SDAG-NEXT: .vgpr_count: 0x2b{{$}} +; GISEL-NEXT: .vgpr_count: 0x34{{$}} ; GCN-NEXT: simple_stack_indirect_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x68{{$}} -; GFX9-NEXT: .sgpr_count: 0x6c{{$}} +; GFX8-NEXT: .sgpr_count: 0x28{{$}} +; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} -; SDAG-NEXT: .vgpr_count: 0x40{{$}} -; GISEL-NEXT: .vgpr_count: 0x40{{$}} +; SDAG-NEXT: .vgpr_count: 0x2b{{$}} +; GISEL-NEXT: .vgpr_count: 0x34{{$}} ; GCN-NEXT: simple_stack_recurse: ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x26{{$}} Index: llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll +++ llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll @@ -556,9 +556,8 @@ ; GCN-LABEL: {{^}}f1024: ; GFX9: NumVgprs: 64 -; GFX90A: NumVgprs: 128 -; GFX90A: NumAgprs: 128 -; GFX90A: TotalNumVgprs: 256 +; GFX90A: NumAgprs: 64 +; GFX90A: TotalNumVgprs: 128 ; GFX10WGP-WAVE32: NumVgprs: 128 ; GFX10WGP-WAVE64: NumVgprs: 128 ; GFX10CU-WAVE32: NumVgprs: 64 Index: llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll +++ llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll @@ -8,12 +8,12 @@ @alias = hidden alias void (), void ()* @aliasee_default ; ALL-LABEL: {{^}}kernel: -; GFX908: .amdhsa_next_free_vgpr 64 -; GFX908-NEXT: .amdhsa_next_free_sgpr 102 +; GFX908: .amdhsa_next_free_vgpr 41 +; GFX908-NEXT: .amdhsa_next_free_sgpr 33 -; GFX90A: .amdhsa_next_free_vgpr 256 -; GFX90A-NEXT: .amdhsa_next_free_sgpr 102 -; GFX90A-NEXT: .amdhsa_accum_offset 128 +; GFX90A: .amdhsa_next_free_vgpr 71 +; GFX90A-NEXT: .amdhsa_next_free_sgpr 33 +; GFX90A-NEXT: .amdhsa_accum_offset 44 define amdgpu_kernel void @kernel() #0 { bb: call void @alias() #2 Index: llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll +++ llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll @@ -7,8 +7,8 @@ @alias0 = hidden alias void (), void ()* @aliasee_default_vgpr64_sgpr102 ; CHECK-LABEL: {{^}}kernel0: -; CHECK: .amdhsa_next_free_vgpr 64 -; CHECK-NEXT: .amdhsa_next_free_sgpr 102 +; CHECK: .amdhsa_next_free_vgpr 53 +; CHECK-NEXT: .amdhsa_next_free_sgpr 33 define amdgpu_kernel void @kernel0() #0 { bb: call void @alias0() #2 Index: llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll +++ llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll @@ -10,7 +10,7 @@ ; CHECK-LABEL: {{^}}kernel1: ; CHECK: .amdhsa_next_free_vgpr 42 -; CHECK-NEXT: .amdhsa_next_free_sgpr 74 +; CHECK-NEXT: .amdhsa_next_free_sgpr 33 define amdgpu_kernel void @kernel1() #0 { bb: call void asm sideeffect "; clobber v40 ", "~{v40}"() Index: llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll +++ llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll @@ -7,8 +7,8 @@ @alias2 = hidden alias void (), void()* @aliasee_vgpr64_sgpr102 ; CHECK-LABEL: {{^}}kernel2: -; CHECK: .amdhsa_next_free_vgpr 64 -; CHECK-NEXT: .amdhsa_next_free_sgpr 102 +; CHECK: .amdhsa_next_free_vgpr 53 +; CHECK-NEXT: .amdhsa_next_free_sgpr 33 define amdgpu_kernel void @kernel2() #0 { bb: call void @alias2() #2 Index: llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll +++ llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll @@ -7,8 +7,8 @@ @alias3 = hidden alias void (), void ()* @aliasee_vgpr256_sgpr102 ; CHECK-LABEL: {{^}}kernel3: -; CHECK: .amdhsa_next_free_vgpr 256 -; CHECK-NEXT: .amdhsa_next_free_sgpr 102 +; CHECK: .amdhsa_next_free_vgpr 253 +; CHECK-NEXT: .amdhsa_next_free_sgpr 33 define amdgpu_kernel void @kernel3() #0 { bb: call void @alias3() #2 Index: llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -227,10 +227,10 @@ ; Make sure there's no assert when a sgpr96 is used. ; GCN-LABEL: {{^}}count_use_sgpr96_external_call ; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}] -; CI: NumSgprs: 104 -; VI-NOBUG: NumSgprs: 108 +; CI: NumSgprs: 84 +; VI-NOBUG: NumSgprs: 86 ; VI-BUG: NumSgprs: 96 -; GCN: NumVgprs: 64 +; GCN: NumVgprs: 50 define amdgpu_kernel void @count_use_sgpr96_external_call() { entry: tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> ) #1 @@ -241,10 +241,10 @@ ; Make sure there's no assert when a sgpr160 is used. ; GCN-LABEL: {{^}}count_use_sgpr160_external_call ; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}] -; CI: NumSgprs: 104 -; VI-NOBUG: NumSgprs: 108 +; CI: NumSgprs: 84 +; VI-NOBUG: NumSgprs: 86 ; VI-BUG: NumSgprs: 96 -; GCN: NumVgprs: 64 +; GCN: NumVgprs: 50 define amdgpu_kernel void @count_use_sgpr160_external_call() { entry: tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> ) #1 @@ -255,10 +255,10 @@ ; Make sure there's no assert when a vgpr160 is used. ; GCN-LABEL: {{^}}count_use_vgpr160_external_call ; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}] -; CI: NumSgprs: 104 -; VI-NOBUG: NumSgprs: 108 +; CI: NumSgprs: 84 +; VI-NOBUG: NumSgprs: 86 ; VI-BUG: NumSgprs: 96 -; GCN: NumVgprs: 64 +; GCN: NumVgprs: 50 define amdgpu_kernel void @count_use_vgpr160_external_call() { entry: tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> ) #1 Index: llvm/test/CodeGen/AMDGPU/indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -16,8 +16,8 @@ ; GCN-NEXT: amd_machine_version_stepping = 0 ; GCN-NEXT: kernel_code_entry_byte_offset = 256 ; GCN-NEXT: kernel_code_prefetch_byte_size = 0 -; GCN-NEXT: granulated_workitem_vgpr_count = 15 -; GCN-NEXT: granulated_wavefront_sgpr_count = 12 +; GCN-NEXT: granulated_workitem_vgpr_count = 10 +; GCN-NEXT: granulated_wavefront_sgpr_count = 8 ; GCN-NEXT: priority = 0 ; GCN-NEXT: float_mode = 240 ; GCN-NEXT: priv = 0 @@ -60,8 +60,8 @@ ; GCN-NEXT: gds_segment_byte_size = 0 ; GCN-NEXT: kernarg_segment_byte_size = 64 ; GCN-NEXT: workgroup_fbarrier_count = 0 -; GCN-NEXT: wavefront_sgpr_count = 104 -; GCN-NEXT: workitem_vgpr_count = 64 +; GCN-NEXT: wavefront_sgpr_count = 68 +; GCN-NEXT: workitem_vgpr_count = 42 ; GCN-NEXT: reserved_vgpr_first = 0 ; GCN-NEXT: reserved_vgpr_count = 0 ; GCN-NEXT: reserved_sgpr_first = 0 @@ -109,8 +109,8 @@ ; GISEL-NEXT: amd_machine_version_stepping = 0 ; GISEL-NEXT: kernel_code_entry_byte_offset = 256 ; GISEL-NEXT: kernel_code_prefetch_byte_size = 0 -; GISEL-NEXT: granulated_workitem_vgpr_count = 15 -; GISEL-NEXT: granulated_wavefront_sgpr_count = 12 +; GISEL-NEXT: granulated_workitem_vgpr_count = 10 +; GISEL-NEXT: granulated_wavefront_sgpr_count = 8 ; GISEL-NEXT: priority = 0 ; GISEL-NEXT: float_mode = 240 ; GISEL-NEXT: priv = 0 @@ -153,8 +153,8 @@ ; GISEL-NEXT: gds_segment_byte_size = 0 ; GISEL-NEXT: kernarg_segment_byte_size = 64 ; GISEL-NEXT: workgroup_fbarrier_count = 0 -; GISEL-NEXT: wavefront_sgpr_count = 104 -; GISEL-NEXT: workitem_vgpr_count = 64 +; GISEL-NEXT: wavefront_sgpr_count = 68 +; GISEL-NEXT: workitem_vgpr_count = 42 ; GISEL-NEXT: reserved_vgpr_first = 0 ; GISEL-NEXT: reserved_vgpr_count = 0 ; GISEL-NEXT: reserved_sgpr_first = 0 @@ -207,8 +207,8 @@ ; GCN-NEXT: amd_machine_version_stepping = 0 ; GCN-NEXT: kernel_code_entry_byte_offset = 256 ; GCN-NEXT: kernel_code_prefetch_byte_size = 0 -; GCN-NEXT: granulated_workitem_vgpr_count = 15 -; GCN-NEXT: granulated_wavefront_sgpr_count = 12 +; GCN-NEXT: granulated_workitem_vgpr_count = 10 +; GCN-NEXT: granulated_wavefront_sgpr_count = 8 ; GCN-NEXT: priority = 0 ; GCN-NEXT: float_mode = 240 ; GCN-NEXT: priv = 0 @@ -251,8 +251,8 @@ ; GCN-NEXT: gds_segment_byte_size = 0 ; GCN-NEXT: kernarg_segment_byte_size = 64 ; GCN-NEXT: workgroup_fbarrier_count = 0 -; GCN-NEXT: wavefront_sgpr_count = 104 -; GCN-NEXT: workitem_vgpr_count = 64 +; GCN-NEXT: wavefront_sgpr_count = 68 +; GCN-NEXT: workitem_vgpr_count = 42 ; GCN-NEXT: reserved_vgpr_first = 0 ; GCN-NEXT: reserved_vgpr_count = 0 ; GCN-NEXT: reserved_sgpr_first = 0 @@ -301,8 +301,8 @@ ; GISEL-NEXT: amd_machine_version_stepping = 0 ; GISEL-NEXT: kernel_code_entry_byte_offset = 256 ; GISEL-NEXT: kernel_code_prefetch_byte_size = 0 -; GISEL-NEXT: granulated_workitem_vgpr_count = 15 -; GISEL-NEXT: granulated_wavefront_sgpr_count = 12 +; GISEL-NEXT: granulated_workitem_vgpr_count = 10 +; GISEL-NEXT: granulated_wavefront_sgpr_count = 8 ; GISEL-NEXT: priority = 0 ; GISEL-NEXT: float_mode = 240 ; GISEL-NEXT: priv = 0 @@ -345,8 +345,8 @@ ; GISEL-NEXT: gds_segment_byte_size = 0 ; GISEL-NEXT: kernarg_segment_byte_size = 64 ; GISEL-NEXT: workgroup_fbarrier_count = 0 -; GISEL-NEXT: wavefront_sgpr_count = 104 -; GISEL-NEXT: workitem_vgpr_count = 64 +; GISEL-NEXT: wavefront_sgpr_count = 68 +; GISEL-NEXT: workitem_vgpr_count = 42 ; GISEL-NEXT: reserved_vgpr_first = 0 ; GISEL-NEXT: reserved_vgpr_count = 0 ; GISEL-NEXT: reserved_sgpr_first = 0 Index: llvm/test/CodeGen/AMDGPU/ipra.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ipra.ll +++ llvm/test/CodeGen/AMDGPU/ipra.ll @@ -91,6 +91,7 @@ ret void } +; GCN-LABEL: {{^}}void_func_void: define void @void_func_void() noinline { ret void } @@ -105,6 +106,7 @@ ret void } +; GCN-LABEL: {{^}}wombat: define weak amdgpu_kernel void @wombat(i32* %arg, i32* %arg2) { bb: call void @hoge() #0 Index: llvm/test/CodeGen/AMDGPU/llc-pipeline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -138,12 +138,12 @@ ; GCN-O0-NEXT: Branch relaxation pass ; GCN-O0-NEXT: Register Usage Information Collector Pass ; GCN-O0-NEXT: Live DEBUG_VALUE analysis -; GCN-O0-NEXT: Function register usage analysis -; GCN-O0-NEXT: FunctionPass Manager -; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis -; GCN-O0-NEXT: Machine Optimization Remark Emitter -; GCN-O0-NEXT: AMDGPU Assembly Printer -; GCN-O0-NEXT: Free MachineFunction +; GCN-O0-NEXT: Function register usage analysis +; GCN-O0-NEXT: FunctionPass Manager +; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O0-NEXT: Machine Optimization Remark Emitter +; GCN-O0-NEXT: AMDGPU Assembly Printer +; GCN-O0-NEXT: Free MachineFunction ; GCN-O0-NEXT:Pass Arguments: -domtree ; GCN-O0-NEXT: FunctionPass Manager ; GCN-O0-NEXT: Dominator Tree Construction @@ -390,12 +390,12 @@ ; GCN-O1-NEXT: Branch relaxation pass ; GCN-O1-NEXT: Register Usage Information Collector Pass ; GCN-O1-NEXT: Live DEBUG_VALUE analysis -; GCN-O1-NEXT: Function register usage analysis -; GCN-O1-NEXT: FunctionPass Manager -; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis -; GCN-O1-NEXT: Machine Optimization Remark Emitter -; GCN-O1-NEXT: AMDGPU Assembly Printer -; GCN-O1-NEXT: Free MachineFunction +; GCN-O1-NEXT: Function register usage analysis +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O1-NEXT: Machine Optimization Remark Emitter +; GCN-O1-NEXT: AMDGPU Assembly Printer +; GCN-O1-NEXT: Free MachineFunction ; GCN-O1-NEXT:Pass Arguments: -domtree ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Dominator Tree Construction @@ -675,12 +675,12 @@ ; GCN-O1-OPTS-NEXT: Branch relaxation pass ; GCN-O1-OPTS-NEXT: Register Usage Information Collector Pass ; GCN-O1-OPTS-NEXT: Live DEBUG_VALUE analysis -; GCN-O1-OPTS-NEXT: Function register usage analysis -; GCN-O1-OPTS-NEXT: FunctionPass Manager -; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis -; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter -; GCN-O1-OPTS-NEXT: AMDGPU Assembly Printer -; GCN-O1-OPTS-NEXT: Free MachineFunction +; GCN-O1-OPTS-NEXT: Function register usage analysis +; GCN-O1-OPTS-NEXT: FunctionPass Manager +; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter +; GCN-O1-OPTS-NEXT: AMDGPU Assembly Printer +; GCN-O1-OPTS-NEXT: Free MachineFunction ; GCN-O1-OPTS-NEXT:Pass Arguments: -domtree ; GCN-O1-OPTS-NEXT: FunctionPass Manager ; GCN-O1-OPTS-NEXT: Dominator Tree Construction @@ -962,12 +962,12 @@ ; GCN-O2-NEXT: Branch relaxation pass ; GCN-O2-NEXT: Register Usage Information Collector Pass ; GCN-O2-NEXT: Live DEBUG_VALUE analysis -; GCN-O2-NEXT: Function register usage analysis -; GCN-O2-NEXT: FunctionPass Manager -; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis -; GCN-O2-NEXT: Machine Optimization Remark Emitter -; GCN-O2-NEXT: AMDGPU Assembly Printer -; GCN-O2-NEXT: Free MachineFunction +; GCN-O2-NEXT: Function register usage analysis +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O2-NEXT: Machine Optimization Remark Emitter +; GCN-O2-NEXT: AMDGPU Assembly Printer +; GCN-O2-NEXT: Free MachineFunction ; GCN-O2-NEXT:Pass Arguments: -domtree ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Dominator Tree Construction @@ -1262,12 +1262,12 @@ ; GCN-O3-NEXT: Branch relaxation pass ; GCN-O3-NEXT: Register Usage Information Collector Pass ; GCN-O3-NEXT: Live DEBUG_VALUE analysis -; GCN-O3-NEXT: Function register usage analysis -; GCN-O3-NEXT: FunctionPass Manager -; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis -; GCN-O3-NEXT: Machine Optimization Remark Emitter -; GCN-O3-NEXT: AMDGPU Assembly Printer -; GCN-O3-NEXT: Free MachineFunction +; GCN-O3-NEXT: Function register usage analysis +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O3-NEXT: Machine Optimization Remark Emitter +; GCN-O3-NEXT: AMDGPU Assembly Printer +; GCN-O3-NEXT: Free MachineFunction ; GCN-O3-NEXT:Pass Arguments: -domtree ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Dominator Tree Construction Index: llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll +++ llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll @@ -1,6 +1,9 @@ ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX908 %s ; RUN: not llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GCN,GFX900 %s +; GFX900: couldn't allocate input reg for constraint 'a' + + ; GCN-LABEL: {{^}}max_10_vgprs: ; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 ; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1 @@ -65,8 +68,6 @@ ; GFX908: v_accvgpr_read_b32 [[V_REG]], [[A_REG]] ; GFX908-NOT: buffer_ -; GFX900: couldn't allocate input reg for constraint 'a' - ; GFX908: NumVgprs: 10 ; GFX908: ScratchSize: 0 ; GFX908: VGPRBlocks: 2 Index: llvm/test/CodeGen/AMDGPU/trap.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/trap.ll +++ llvm/test/CodeGen/AMDGPU/trap.ll @@ -14,6 +14,9 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; GCN-WARNING: warning: :0:0: in function hsa_debugtrap void (i32 addrspace(1)*): debugtrap handler not supported + + declare void @llvm.trap() #0 declare void @llvm.debugtrap() #1 @@ -54,7 +57,6 @@ ; NOMESA-TRAP: .long 47180 ; NOMESA-TRAP-NEXT: .long 144 -; GCN-WARNING: warning: :0:0: in function hsa_debugtrap void (i32 addrspace(1)*): debugtrap handler not supported ; GCN-LABEL: {{^}}hsa_debugtrap: ; HSA-TRAP: enable_trap_handler = 0 ; HSA-TRAP: s_trap 3