1
- ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2
- ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
1
+ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI %s
2
+ ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=CI %s
3
+ ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI --check-prefix=GCN-HSA %s
3
4
4
5
declare i32 @llvm.r600.read.tidig.x () #0
5
6
declare i32 @llvm.r600.read.tidig.y () #0
@@ -18,8 +19,10 @@ declare i32 @llvm.r600.read.tidig.y() #0
18
19
19
20
; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
20
21
; instructions
21
- ; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
22
- ; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
22
+ ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
23
+ ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
24
+ ; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}
25
+ ; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}
23
26
24
27
define void @mubuf (i32 addrspace (1 )* %out , i8 addrspace (1 )* %in ) #1 {
25
28
entry:
@@ -50,8 +53,10 @@ done: ; preds = %loop
50
53
; Test moving an SMRD instruction to the VALU
51
54
52
55
; GCN-LABEL: {{^}}smrd_valu:
56
+ ; FIXME: We should be using flat load for HSA.
53
57
; GCN: buffer_load_dword [[OUT:v[0-9]+]]
54
- ; GCN: buffer_store_dword [[OUT]]
58
+ ; GCN-NOHSA: buffer_store_dword [[OUT]]
59
+ ; GCN-HSA: flat_store_dword [[OUT]]
55
60
define void @smrd_valu (i32 addrspace (2 )* addrspace (1 )* %in , i32 %a , i32 %b , i32 addrspace (1 )* %out ) #1 {
56
61
entry:
57
62
%tmp = icmp ne i32 %a , 0
@@ -77,8 +82,9 @@ endif: ; preds = %else, %if
77
82
; Test moving an SMRD with an immediate offset to the VALU
78
83
79
84
; GCN-LABEL: {{^}}smrd_valu2:
80
- ; GCN-NOT: v_add
81
- ; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}}
85
+ ; GCN-NOHSA-NOT: v_add
86
+ ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}}
87
+ ; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
82
88
define void @smrd_valu2 (i32 addrspace (1 )* %out , [8 x i32 ] addrspace (2 )* %in ) #1 {
83
89
entry:
84
90
%tmp = call i32 @llvm.r600.read.tidig.x () #0
@@ -91,12 +97,14 @@ entry:
91
97
92
98
; Use a big offset that will use the SMRD literal offset on CI
93
99
; GCN-LABEL: {{^}}smrd_valu_ci_offset:
94
- ; GCN-NOT: v_add
95
- ; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4e20{{$}}
96
- ; GCN-NOT: v_add
97
- ; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}}
98
- ; GCN: v_add_i32_e32
99
- ; GCN: buffer_store_dword
100
+ ; GCN-NOHSA-NOT: v_add
101
+ ; GCN-NOHSA: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4e20{{$}}
102
+ ; GCN-NOHSA-NOT: v_add
103
+ ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}}
104
+ ; GCN-NOHSA: v_add_i32_e32
105
+ ; GCN-NOHSA: buffer_store_dword
106
+ ; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
107
+ ; GCN-HSA: flat_store_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
100
108
define void @smrd_valu_ci_offset (i32 addrspace (1 )* %out , i32 addrspace (2 )* %in , i32 %c ) #1 {
101
109
entry:
102
110
%tmp = call i32 @llvm.r600.read.tidig.x () #0
@@ -109,13 +117,14 @@ entry:
109
117
}
110
118
111
119
; GCN-LABEL: {{^}}smrd_valu_ci_offset_x2:
112
- ; GCN-NOT: v_add
113
- ; GCN: s_mov_b32 [[OFFSET:s[0-9]+]], 0x9c40{{$}}
114
- ; GCN-NOT: v_add
115
- ; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}}
116
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
117
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
118
- ; GCN: buffer_store_dwordx2
120
+ ; GCN-NOHSA-NOT: v_add
121
+ ; GCN-NOHSA: s_mov_b32 [[OFFSET:s[0-9]+]], 0x9c40{{$}}
122
+ ; GCN-NOHSA-NOT: v_add
123
+ ; GCN-NOHSA: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}}
124
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
125
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
126
+ ; GCN-NOHSA: buffer_store_dwordx2
127
+ ; GCN-HSA: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
119
128
define void @smrd_valu_ci_offset_x2 (i64 addrspace (1 )* %out , i64 addrspace (2 )* %in , i64 %c ) #1 {
120
129
entry:
121
130
%tmp = call i32 @llvm.r600.read.tidig.x () #0
@@ -128,15 +137,16 @@ entry:
128
137
}
129
138
130
139
; GCN-LABEL: {{^}}smrd_valu_ci_offset_x4:
131
- ; GCN-NOT: v_add
132
- ; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4d20{{$}}
133
- ; GCN-NOT: v_add
134
- ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}}
135
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
136
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
137
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
138
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
139
- ; GCN: buffer_store_dwordx4
140
+ ; GCN-NOHSA-NOT: v_add
141
+ ; GCN-NOHSA: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4d20{{$}}
142
+ ; GCN-NOHSA-NOT: v_add
143
+ ; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}}
144
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
145
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
146
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
147
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
148
+ ; GCN-NOHSA: buffer_store_dwordx4
149
+ ; GCN-HSA: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
140
150
define void @smrd_valu_ci_offset_x4 (<4 x i32 > addrspace (1 )* %out , <4 x i32 > addrspace (2 )* %in , <4 x i32 > %c ) #1 {
141
151
entry:
142
152
%tmp = call i32 @llvm.r600.read.tidig.x () #0
@@ -152,25 +162,27 @@ entry:
152
162
; CI.
153
163
154
164
; GCN-LABEL: {{^}}smrd_valu_ci_offset_x8:
155
- ; GCN-NOT: v_add
156
- ; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}}
157
- ; GCN-NOT: v_add
158
- ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
159
- ; GCN-NOT: v_add
160
- ; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}}
161
- ; GCN-NOT: v_add
162
- ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
163
-
164
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
165
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
166
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
167
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
168
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
169
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
170
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
171
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
172
- ; GCN: buffer_store_dwordx4
173
- ; GCN: buffer_store_dwordx4
165
+ ; GCN-NOHSA-NOT: v_add
166
+ ; GCN-NOHSA: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}}
167
+ ; GCN-NOHSA-NOT: v_add
168
+ ; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
169
+ ; GCN-NOHSA-NOT: v_add
170
+ ; GCN-NOHSA: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}}
171
+ ; GCN-NOHSA-NOT: v_add
172
+ ; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
173
+
174
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
175
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
176
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
177
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
178
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
179
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
180
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
181
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
182
+ ; GCN-NOHSA: buffer_store_dwordx4
183
+ ; GCN-NOHSA: buffer_store_dwordx4
184
+ ; GCN-HSA: flat_load_dwordx4
185
+ ; GCN-HSA: flat_load_dwordx4
174
186
define void @smrd_valu_ci_offset_x8 (<8 x i32 > addrspace (1 )* %out , <8 x i32 > addrspace (2 )* %in , <8 x i32 > %c ) #1 {
175
187
entry:
176
188
%tmp = call i32 @llvm.r600.read.tidig.x () #0
@@ -184,35 +196,40 @@ entry:
184
196
185
197
; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16:
186
198
187
- ; GCN-NOT: v_add
188
- ; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
189
- ; GCN-NOT: v_add
190
- ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
191
- ; GCN-NOT: v_add
192
- ; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}
193
- ; GCN-NOT: v_add
194
- ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
195
- ; GCN-NOT: v_add
196
- ; GCN: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}}
197
- ; GCN-NOT: v_add
198
- ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}}
199
- ; GCN-NOT: v_add
200
- ; GCN: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}}
201
- ; GCN-NOT: v_add
202
- ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}}
203
-
204
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
205
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
206
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
207
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
208
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
209
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
210
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
211
- ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
212
- ; GCN: buffer_store_dwordx4
213
- ; GCN: buffer_store_dwordx4
214
- ; GCN: buffer_store_dwordx4
215
- ; GCN: buffer_store_dwordx4
199
+ ; GCN-NOHSA-NOT: v_add
200
+ ; GCN-NOHSA: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
201
+ ; GCN-NOHSA-NOT: v_add
202
+ ; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
203
+ ; GCN-NOHSA-NOT: v_add
204
+ ; GCN-NOHSA: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}
205
+ ; GCN-NOHSA-NOT: v_add
206
+ ; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
207
+ ; GCN-NOHSA-NOT: v_add
208
+ ; GCN-NOHSA: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}}
209
+ ; GCN-NOHSA-NOT: v_add
210
+ ; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}}
211
+ ; GCN-NOHSA-NOT: v_add
212
+ ; GCN-NOHSA: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}}
213
+ ; GCN-NOHSA-NOT: v_add
214
+ ; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}}
215
+
216
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
217
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
218
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
219
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
220
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
221
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
222
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
223
+ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
224
+ ; GCN-NOHSA: buffer_store_dwordx4
225
+ ; GCN-NOHSA: buffer_store_dwordx4
226
+ ; GCN-NOHSA: buffer_store_dwordx4
227
+ ; GCN-NOHSA: buffer_store_dwordx4
228
+
229
+ ; GCN-HSA: flat_load_dwordx4
230
+ ; GCN-HSA: flat_load_dwordx4
231
+ ; GCN-HSA: flat_load_dwordx4
232
+ ; GCN-HSA: flat_load_dwordx4
216
233
217
234
; GCN: s_endpgm
218
235
define void @smrd_valu_ci_offset_x16 (<16 x i32 > addrspace (1 )* %out , <16 x i32 > addrspace (2 )* %in , <16 x i32 > %c ) #1 {
@@ -227,9 +244,11 @@ entry:
227
244
}
228
245
229
246
; GCN-LABEL: {{^}}smrd_valu2_salu_user:
230
- ; GCN: buffer_load_dword [[MOVED:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
247
+ ; GCN-NOHSA: buffer_load_dword [[MOVED:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
248
+ ; GCN-HSA: flat_load_dword [[MOVED:v[0-9]+]], v[{{[0-9+:[0-9]+}}]
231
249
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]]
232
- ; GCN: buffer_store_dword [[ADD]]
250
+ ; GCN-NOHSA: buffer_store_dword [[ADD]]
251
+ ; GCN-HSA: flat_store_dword [[ADD]]
233
252
define void @smrd_valu2_salu_user (i32 addrspace (1 )* %out , [8 x i32 ] addrspace (2 )* %in , i32 %a ) #1 {
234
253
entry:
235
254
%tmp = call i32 @llvm.r600.read.tidig.x () #0
@@ -242,7 +261,8 @@ entry:
242
261
}
243
262
244
263
; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset:
245
- ; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}}
264
+ ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}}
265
+ ; GCN-HSA flat_load_dword v{{[0-9]}}, v{{[0-9]+:[0-9]+}}
246
266
define void @smrd_valu2_max_smrd_offset (i32 addrspace (1 )* %out , [1024 x i32 ] addrspace (2 )* %in ) #1 {
247
267
entry:
248
268
%tmp = call i32 @llvm.r600.read.tidig.x () #0
@@ -254,8 +274,9 @@ entry:
254
274
}
255
275
256
276
; GCN-LABEL: {{^}}smrd_valu2_mubuf_offset:
257
- ; GCN-NOT: v_add
258
- ; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}}
277
+ ; GCN-NOHSA-NOT: v_add
278
+ ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}}
279
+ ; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}]
259
280
define void @smrd_valu2_mubuf_offset (i32 addrspace (1 )* %out , [1024 x i32 ] addrspace (2 )* %in ) #1 {
260
281
entry:
261
282
%tmp = call i32 @llvm.r600.read.tidig.x () #0
@@ -267,8 +288,10 @@ entry:
267
288
}
268
289
269
290
; GCN-LABEL: {{^}}s_load_imm_v8i32:
270
- ; GCN: buffer_load_dwordx4
271
- ; GCN: buffer_load_dwordx4
291
+ ; GCN-NOHSA: buffer_load_dwordx4
292
+ ; GCN-NOHSA: buffer_load_dwordx4
293
+ ; GCN-HSA: flat_load_dwordx4
294
+ ; GCN-HSA: flat_load_dwordx4
272
295
define void @s_load_imm_v8i32 (<8 x i32 > addrspace (1 )* %out , i32 addrspace (2 )* nocapture readonly %in ) #1 {
273
296
entry:
274
297
%tmp0 = tail call i32 @llvm.r600.read.tidig.x ()
@@ -280,16 +303,18 @@ entry:
280
303
}
281
304
282
305
; GCN-LABEL: {{^}}s_load_imm_v8i32_salu_user:
283
- ; GCN: buffer_load_dwordx4
284
- ; GCN: buffer_load_dwordx4
285
- ; GCN: v_add_i32_e32
286
- ; GCN: v_add_i32_e32
287
- ; GCN: v_add_i32_e32
288
- ; GCN: v_add_i32_e32
289
- ; GCN: v_add_i32_e32
290
- ; GCN: v_add_i32_e32
291
- ; GCN: v_add_i32_e32
292
- ; GCN: buffer_store_dword
306
+ ; GCN-NOHSA: buffer_load_dwordx4
307
+ ; GCN-NOHSA: buffer_load_dwordx4
308
+ ; GCN-NOHSA: v_add_i32_e32
309
+ ; GCN-NOHSA: v_add_i32_e32
310
+ ; GCN-NOHSA: v_add_i32_e32
311
+ ; GCN-NOHSA: v_add_i32_e32
312
+ ; GCN-NOHSA: v_add_i32_e32
313
+ ; GCN-NOHSA: v_add_i32_e32
314
+ ; GCN-NOHSA: v_add_i32_e32
315
+ ; GCN-NOHSA: buffer_store_dword
316
+ ; GCN-HSA: flat_load_dwordx4
317
+ ; GCN-HSA: flat_load_dwordx4
293
318
define void @s_load_imm_v8i32_salu_user (i32 addrspace (1 )* %out , i32 addrspace (2 )* nocapture readonly %in ) #1 {
294
319
entry:
295
320
%tmp0 = tail call i32 @llvm.r600.read.tidig.x ()
@@ -319,10 +344,14 @@ entry:
319
344
}
320
345
321
346
; GCN-LABEL: {{^}}s_load_imm_v16i32:
322
- ; GCN: buffer_load_dwordx4
323
- ; GCN: buffer_load_dwordx4
324
- ; GCN: buffer_load_dwordx4
325
- ; GCN: buffer_load_dwordx4
347
+ ; GCN-NOHSA: buffer_load_dwordx4
348
+ ; GCN-NOHSA: buffer_load_dwordx4
349
+ ; GCN-NOHSA: buffer_load_dwordx4
350
+ ; GCN-NOHSA: buffer_load_dwordx4
351
+ ; GCN-HSA: flat_load_dwordx4
352
+ ; GCN-HSA: flat_load_dwordx4
353
+ ; GCN-HSA: flat_load_dwordx4
354
+ ; GCN-HSA: flat_load_dwordx4
326
355
define void @s_load_imm_v16i32 (<16 x i32 > addrspace (1 )* %out , i32 addrspace (2 )* nocapture readonly %in ) #1 {
327
356
entry:
328
357
%tmp0 = tail call i32 @llvm.r600.read.tidig.x () #1
@@ -334,26 +363,30 @@ entry:
334
363
}
335
364
336
365
; GCN-LABEL: {{^}}s_load_imm_v16i32_salu_user:
337
- ; GCN: buffer_load_dwordx4
338
- ; GCN: buffer_load_dwordx4
339
- ; GCN: buffer_load_dwordx4
340
- ; GCN: buffer_load_dwordx4
341
- ; GCN: v_add_i32_e32
342
- ; GCN: v_add_i32_e32
343
- ; GCN: v_add_i32_e32
344
- ; GCN: v_add_i32_e32
345
- ; GCN: v_add_i32_e32
346
- ; GCN: v_add_i32_e32
347
- ; GCN: v_add_i32_e32
348
- ; GCN: v_add_i32_e32
349
- ; GCN: v_add_i32_e32
350
- ; GCN: v_add_i32_e32
351
- ; GCN: v_add_i32_e32
352
- ; GCN: v_add_i32_e32
353
- ; GCN: v_add_i32_e32
354
- ; GCN: v_add_i32_e32
355
- ; GCN: v_add_i32_e32
356
- ; GCN: buffer_store_dword
366
+ ; GCN-NOHSA: buffer_load_dwordx4
367
+ ; GCN-NOHSA: buffer_load_dwordx4
368
+ ; GCN-NOHSA: buffer_load_dwordx4
369
+ ; GCN-NOHSA: buffer_load_dwordx4
370
+ ; GCN-NOHSA: v_add_i32_e32
371
+ ; GCN-NOHSA: v_add_i32_e32
372
+ ; GCN-NOHSA: v_add_i32_e32
373
+ ; GCN-NOHSA: v_add_i32_e32
374
+ ; GCN-NOHSA: v_add_i32_e32
375
+ ; GCN-NOHSA: v_add_i32_e32
376
+ ; GCN-NOHSA: v_add_i32_e32
377
+ ; GCN-NOHSA: v_add_i32_e32
378
+ ; GCN-NOHSA: v_add_i32_e32
379
+ ; GCN-NOHSA: v_add_i32_e32
380
+ ; GCN-NOHSA: v_add_i32_e32
381
+ ; GCN-NOHSA: v_add_i32_e32
382
+ ; GCN-NOHSA: v_add_i32_e32
383
+ ; GCN-NOHSA: v_add_i32_e32
384
+ ; GCN-NOHSA: v_add_i32_e32
385
+ ; GCN-NOHSA: buffer_store_dword
386
+ ; GCN-HSA: flat_load_dwordx4
387
+ ; GCN-HSA: flat_load_dwordx4
388
+ ; GCN-HSA: flat_load_dwordx4
389
+ ; GCN-HSA: flat_load_dwordx4
357
390
define void @s_load_imm_v16i32_salu_user (i32 addrspace (1 )* %out , i32 addrspace (2 )* nocapture readonly %in ) #1 {
358
391
entry:
359
392
%tmp0 = tail call i32 @llvm.r600.read.tidig.x () #1
0 commit comments