Skip to content

Commit c72ece6

Browse files
committedMay 16, 2018
AMDGPU : Recalculate SGPRs when trap handler is supported
Differential Revision: https://reviews.llvm.org/D29911 llvm-svn: 332523
1 parent 1f5eb86 commit c72ece6

File tree

3 files changed

+81
-6
lines changed

3 files changed

+81
-6
lines changed
 

‎llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

+9-5
Original file line numberDiff line numberDiff line change
@@ -358,9 +358,11 @@ unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
358358

359359
if (WavesPerEU >= getMaxWavesPerEU(Features))
360360
return 0;
361-
unsigned MinNumSGPRs =
362-
alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
363-
getSGPRAllocGranule(Features)) + 1;
361+
362+
unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
363+
if (Features.test(FeatureTrapHandler))
364+
MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
365+
MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
364366
return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
365367
}
366368

@@ -369,11 +371,13 @@ unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
369371
assert(WavesPerEU != 0);
370372

371373
IsaVersion Version = getIsaVersion(Features);
372-
unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
373-
getSGPRAllocGranule(Features));
374374
unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
375375
if (Version.Major >= 8 && !Addressable)
376376
AddressableNumSGPRs = 112;
377+
unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
378+
if (Features.test(FeatureTrapHandler))
379+
MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
380+
MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
377381
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
378382
}
379383

‎llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ namespace IsaInfo {
4242
enum {
4343
// The closed Vulkan driver sets 96, which limits the wave count to 8 but
4444
// doesn't spill SGPRs as much as when 80 is set.
45-
FIXED_NUM_SGPRS_FOR_INIT_BUG = 96
45+
FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
46+
TRAP_NUM_SGPRS = 16
4647
};
4748

4849
/// Instruction set architecture version.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+trap-handler < %s | FileCheck %s --check-prefixes=GCN,TRAP-HANDLER-ENABLE
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=-trap-handler < %s | FileCheck %s --check-prefixes=GCN,TRAP-HANDLER-DISABLE
3+
4+
; GCN-LABEL: {{^}}amdhsa_trap_num_sgprs
5+
; TRAP-HANDLER-ENABLE: NumSgprs: 60
6+
; TRAP-HANDLER-DISABLE: NumSgprs: 76
7+
define amdgpu_kernel void @amdhsa_trap_num_sgprs(
8+
i32 addrspace(1)* %out0, i32 %in0,
9+
i32 addrspace(1)* %out1, i32 %in1,
10+
i32 addrspace(1)* %out2, i32 %in2,
11+
i32 addrspace(1)* %out3, i32 %in3,
12+
i32 addrspace(1)* %out4, i32 %in4,
13+
i32 addrspace(1)* %out5, i32 %in5,
14+
i32 addrspace(1)* %out6, i32 %in6,
15+
i32 addrspace(1)* %out7, i32 %in7,
16+
i32 addrspace(1)* %out8, i32 %in8,
17+
i32 addrspace(1)* %out9, i32 %in9,
18+
i32 addrspace(1)* %out10, i32 %in10,
19+
i32 addrspace(1)* %out11, i32 %in11,
20+
i32 addrspace(1)* %out12, i32 %in12,
21+
i32 addrspace(1)* %out13, i32 %in13,
22+
i32 addrspace(1)* %out14, i32 %in14,
23+
i32 addrspace(1)* %out15, i32 %in15,
24+
i32 addrspace(1)* %out16, i32 %in16,
25+
i32 addrspace(1)* %out17, i32 %in17,
26+
i32 addrspace(1)* %out18, i32 %in18,
27+
i32 addrspace(1)* %out19, i32 %in19,
28+
i32 addrspace(1)* %out20, i32 %in20,
29+
i32 addrspace(1)* %out21, i32 %in21,
30+
i32 addrspace(1)* %out22, i32 %in22,
31+
i32 addrspace(1)* %out23, i32 %in23,
32+
i32 addrspace(1)* %out24, i32 %in24,
33+
i32 addrspace(1)* %out25, i32 %in25,
34+
i32 addrspace(1)* %out26, i32 %in26,
35+
i32 addrspace(1)* %out27, i32 %in27,
36+
i32 addrspace(1)* %out28, i32 %in28,
37+
i32 addrspace(1)* %out29, i32 %in29) {
38+
entry:
39+
store i32 %in0, i32 addrspace(1)* %out0
40+
store i32 %in1, i32 addrspace(1)* %out1
41+
store i32 %in2, i32 addrspace(1)* %out2
42+
store i32 %in3, i32 addrspace(1)* %out3
43+
store i32 %in4, i32 addrspace(1)* %out4
44+
store i32 %in5, i32 addrspace(1)* %out5
45+
store i32 %in6, i32 addrspace(1)* %out6
46+
store i32 %in7, i32 addrspace(1)* %out7
47+
store i32 %in8, i32 addrspace(1)* %out8
48+
store i32 %in9, i32 addrspace(1)* %out9
49+
store i32 %in10, i32 addrspace(1)* %out10
50+
store i32 %in11, i32 addrspace(1)* %out11
51+
store i32 %in12, i32 addrspace(1)* %out12
52+
store i32 %in13, i32 addrspace(1)* %out13
53+
store i32 %in14, i32 addrspace(1)* %out14
54+
store i32 %in15, i32 addrspace(1)* %out15
55+
store i32 %in16, i32 addrspace(1)* %out16
56+
store i32 %in17, i32 addrspace(1)* %out17
57+
store i32 %in18, i32 addrspace(1)* %out18
58+
store i32 %in19, i32 addrspace(1)* %out19
59+
store i32 %in20, i32 addrspace(1)* %out20
60+
store i32 %in21, i32 addrspace(1)* %out21
61+
store i32 %in22, i32 addrspace(1)* %out22
62+
store i32 %in23, i32 addrspace(1)* %out23
63+
store i32 %in24, i32 addrspace(1)* %out24
64+
store i32 %in25, i32 addrspace(1)* %out25
65+
store i32 %in26, i32 addrspace(1)* %out26
66+
store i32 %in27, i32 addrspace(1)* %out27
67+
store i32 %in28, i32 addrspace(1)* %out28
68+
store i32 %in29, i32 addrspace(1)* %out29
69+
ret void
70+
}

0 commit comments

Comments
 (0)
Please sign in to comment.