Skip to content

Commit 93df060

Browse files
author
Marek Olsak
committedJul 27, 2015
AMDGPU: don't match vgpr loads for constant loads
Author: Dave Airlie <airlied@redhat.com> In order to implement indirect sampler loads, we don't want to match on a VGPR load but an SGPR one for constants, as we cannot feed VGPRs to the sampler only SGPRs. this should be applicable for llvm 3.7 as well. llvm-svn: 243294
1 parent c1c2b87 commit 93df060

File tree

3 files changed

+4
-19
lines changed

3 files changed

+4
-19
lines changed
 

‎llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2910,9 +2910,6 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
29102910
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
29112911
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
29122912
defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
2913-
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32, constant_load>;
2914-
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32, constant_load>;
2915-
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>;
29162913
} // End Predicates = [isSICI]
29172914

29182915
class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <

‎llvm/test/CodeGen/AMDGPU/gv-const-addrspace.ll

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@
88
@float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
99

1010
; FUNC-LABEL: {{^}}float:
11-
; FIXME: We should be using s_load_dword here.
12-
; SI: buffer_load_dword
13-
; VI: s_load_dword
11+
; GCN: s_load_dword
1412

1513
; EG-DAG: MOV {{\** *}}T2.X
1614
; EG-DAG: MOV {{\** *}}T3.X
@@ -31,9 +29,7 @@ entry:
3129

3230
; FUNC-LABEL: {{^}}i32:
3331

34-
; FIXME: We should be using s_load_dword here.
35-
; SI: buffer_load_dword
36-
; VI: s_load_dword
32+
; GCN: s_load_dword
3733

3834
; EG-DAG: MOV {{\** *}}T2.X
3935
; EG-DAG: MOV {{\** *}}T3.X
@@ -71,9 +67,7 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
7167
<1 x i32> <i32 4> ]
7268

7369
; FUNC-LABEL: {{^}}array_v1_gv_load:
74-
; FIXME: We should be using s_load_dword here.
75-
; SI: buffer_load_dword
76-
; VI: s_load_dword
70+
; GCN: s_load_dword
7771
define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
7872
%gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
7973
%load = load <1 x i32>, <1 x i32> addrspace(2)* %gep, align 4

‎llvm/test/CodeGen/AMDGPU/smrd.ll

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,7 @@ entry:
4343
; GCN-LABEL: {{^}}smrd3:
4444
; FIXME: There are too many copies here because we don't fold immediates
4545
; through REG_SEQUENCE
46-
; SI: s_mov_b32 s[[SLO:[0-9]+]], 0 ;
47-
; SI: s_mov_b32 s[[SHI:[0-9]+]], 4
48-
; SI: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]]
49-
; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]]
50-
; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
51-
; FIXME: We should be able to use s_load_dword here
52-
; SI: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
46+
; SI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
5347
; TODO: Add VI checks
5448
; GCN: s_endpgm
5549
define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {

0 commit comments

Comments
 (0)