diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -217,11 +217,16 @@ // VGPR registers foreach Index = 0-255 in { + // set a cost value for vgprs other than the argument registers (v0-v31). + // The ratio of index/allocation_granularity is taken as the cost value. + // Considered the allocation granularity as 4 here. + let CostPerUse=!if(!gt(Index, 31), !srl(Index, 2), 0) in { def VGPR#Index : SIReg <"v"#Index, Index>, DwarfRegNum<[!add(Index, 2560)]> { let HWEncoding{8} = 1; } + } } // AccVGPR registers diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll --- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll @@ -246,7 +246,7 @@ ; GFX908-DAG v_accvgpr_read_b32 ; GCN: NumVgprs: 256 -; GFX900: ScratchSize: 644 +; GFX900: ScratchSize: 708 ; GFX908-FIXME: ScratchSize: 0 ; GCN: VGPRBlocks: 63 ; GCN: NumVGPRsForWavesPerEU: 256 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll @@ -24,7 +24,7 @@ ; OFFREG is offset system SGPR ; GCN: buffer_store_dword {{v[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], s12 offset:{{[0-9]+}} ; 4-byte Folded Spill ; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], s12 offset:{{[0-9]+}} ; 4-byte Folded Reload -; GCN: NumVgprs: 256 +; GCN: NumVgprs: 255 ; GCN: ScratchSize: 1536 define amdgpu_vs void @main([9 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [17 x <4 x i32>] addrspace(4)* inreg %arg2, [34 x <8 x i32>] addrspace(4)* inreg %arg3, [16 x <4 x i32>] addrspace(4)* inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {