@@ -149,3 +149,174 @@ latch:
149
149
return:
150
150
ret i32 %sum
151
151
}
152
+
153
+ declare {}* @llvm.invariant.start.p0i8 (i64 , i8* nocapture ) nounwind readonly
154
+ declare void @llvm.invariant.end.p0i8 ({}*, i64 , i8* nocapture ) nounwind
155
+ declare void @escaping.invariant.start ({}*) nounwind
156
+ ; invariant.start dominates the load, and in this scope, the
157
+ ; load is invariant. So, we can hoist the `addrld` load out of the loop.
158
+ define i32 @test_fence (i8* %addr , i32 %n , i8* %volatile ) {
159
+ ; CHECK-LABEL: @test_fence
160
+ ; CHECK-LABEL: entry
161
+ ; CHECK: invariant.start
162
+ ; CHECK: %addrld = load atomic i32, i32* %addr.i unordered, align 8
163
+ ; CHECK: br label %loop
164
+ entry:
165
+ %gep = getelementptr inbounds i8 , i8* %addr , i64 8
166
+ %addr.i = bitcast i8* %gep to i32 *
167
+ store atomic i32 5 , i32 * %addr.i unordered , align 8
168
+ fence release
169
+ %invst = call {}* @llvm.invariant.start.p0i8 (i64 4 , i8* %gep )
170
+ br label %loop
171
+
172
+ loop:
173
+ %indvar = phi i32 [ %indvar.next , %loop ], [ 0 , %entry ]
174
+ %sum = phi i32 [ %sum.next , %loop ], [ 0 , %entry ]
175
+ %volload = load atomic i8 , i8* %volatile unordered , align 8
176
+ fence acquire
177
+ %volchk = icmp eq i8 %volload , 0
178
+ %addrld = load atomic i32 , i32* %addr.i unordered , align 8
179
+ %sel = select i1 %volchk , i32 0 , i32 %addrld
180
+ %sum.next = add i32 %sel , %sum
181
+ %indvar.next = add i32 %indvar , 1
182
+ %cond = icmp slt i32 %indvar.next , %n
183
+ br i1 %cond , label %loop , label %loopexit
184
+
185
+ loopexit:
186
+ ret i32 %sum
187
+ }
188
+
189
+
190
+
191
+ ; Same as test above, but the load is no longer invariant (presence of
192
+ ; invariant.end). We cannot hoist the addrld out of loop.
193
+ define i32 @test_fence1 (i8* %addr , i32 %n , i8* %volatile ) {
194
+ ; CHECK-LABEL: @test_fence1
195
+ ; CHECK-LABEL: entry
196
+ ; CHECK: invariant.start
197
+ ; CHECK-NEXT: invariant.end
198
+ ; CHECK-NEXT: br label %loop
199
+ entry:
200
+ %gep = getelementptr inbounds i8 , i8* %addr , i64 8
201
+ %addr.i = bitcast i8* %gep to i32 *
202
+ store atomic i32 5 , i32 * %addr.i unordered , align 8
203
+ fence release
204
+ %invst = call {}* @llvm.invariant.start.p0i8 (i64 4 , i8* %gep )
205
+ call void @llvm.invariant.end.p0i8 ({}* %invst , i64 4 , i8* %gep )
206
+ br label %loop
207
+
208
+ loop:
209
+ %indvar = phi i32 [ %indvar.next , %loop ], [ 0 , %entry ]
210
+ %sum = phi i32 [ %sum.next , %loop ], [ 0 , %entry ]
211
+ %volload = load atomic i8 , i8* %volatile unordered , align 8
212
+ fence acquire
213
+ %volchk = icmp eq i8 %volload , 0
214
+ %addrld = load atomic i32 , i32* %addr.i unordered , align 8
215
+ %sel = select i1 %volchk , i32 0 , i32 %addrld
216
+ %sum.next = add i32 %sel , %sum
217
+ %indvar.next = add i32 %indvar , 1
218
+ %cond = icmp slt i32 %indvar.next , %n
219
+ br i1 %cond , label %loop , label %loopexit
220
+
221
+ loopexit:
222
+ ret i32 %sum
223
+ }
224
+
225
+ ; same as test above, but instead of invariant.end, we have the result of
226
+ ; invariant.start escaping through a call. We cannot hoist the load.
227
+ define i32 @test_fence2 (i8* %addr , i32 %n , i8* %volatile ) {
228
+ ; CHECK-LABEL: @test_fence2
229
+ ; CHECK-LABEL: entry
230
+ ; CHECK-NOT: load
231
+ ; CHECK: br label %loop
232
+ entry:
233
+ %gep = getelementptr inbounds i8 , i8* %addr , i64 8
234
+ %addr.i = bitcast i8* %gep to i32 *
235
+ store atomic i32 5 , i32 * %addr.i unordered , align 8
236
+ fence release
237
+ %invst = call {}* @llvm.invariant.start.p0i8 (i64 4 , i8* %gep )
238
+ call void @escaping.invariant.start ({}* %invst )
239
+ br label %loop
240
+
241
+ loop:
242
+ %indvar = phi i32 [ %indvar.next , %loop ], [ 0 , %entry ]
243
+ %sum = phi i32 [ %sum.next , %loop ], [ 0 , %entry ]
244
+ %volload = load atomic i8 , i8* %volatile unordered , align 8
245
+ fence acquire
246
+ %volchk = icmp eq i8 %volload , 0
247
+ %addrld = load atomic i32 , i32* %addr.i unordered , align 8
248
+ %sel = select i1 %volchk , i32 0 , i32 %addrld
249
+ %sum.next = add i32 %sel , %sum
250
+ %indvar.next = add i32 %indvar , 1
251
+ %cond = icmp slt i32 %indvar.next , %n
252
+ br i1 %cond , label %loop , label %loopexit
253
+
254
+ loopexit:
255
+ ret i32 %sum
256
+ }
257
+
258
+ ; FIXME: invariant.start dominates the load, and in this scope, the
259
+ ; load is invariant. So, we can hoist the `addrld` load out of the loop.
260
+ ; Consider the loadoperand addr.i bitcasted before being passed to
261
+ ; invariant.start
262
+ define i32 @test_fence3 (i32* %addr , i32 %n , i8* %volatile ) {
263
+ ; CHECK-LABEL: @test_fence3
264
+ ; CHECK-LABEL: entry
265
+ ; CHECK: invariant.start
266
+ ; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
267
+ ; CHECK: br label %loop
268
+ entry:
269
+ %addr.i = getelementptr inbounds i32 , i32* %addr , i64 8
270
+ %gep = bitcast i32* %addr.i to i8 *
271
+ store atomic i32 5 , i32 * %addr.i unordered , align 8
272
+ fence release
273
+ %invst = call {}* @llvm.invariant.start.p0i8 (i64 4 , i8* %gep )
274
+ br label %loop
275
+
276
+ loop:
277
+ %indvar = phi i32 [ %indvar.next , %loop ], [ 0 , %entry ]
278
+ %sum = phi i32 [ %sum.next , %loop ], [ 0 , %entry ]
279
+ %volload = load atomic i8 , i8* %volatile unordered , align 8
280
+ fence acquire
281
+ %volchk = icmp eq i8 %volload , 0
282
+ %addrld = load atomic i32 , i32* %addr.i unordered , align 8
283
+ %sel = select i1 %volchk , i32 0 , i32 %addrld
284
+ %sum.next = add i32 %sel , %sum
285
+ %indvar.next = add i32 %indvar , 1
286
+ %cond = icmp slt i32 %indvar.next , %n
287
+ br i1 %cond , label %loop , label %loopexit
288
+
289
+ loopexit:
290
+ ret i32 %sum
291
+ }
292
+
293
+ ; We should not hoist the addrld out of the loop.
294
+ define i32 @test_fence4 (i32* %addr , i32 %n , i8* %volatile ) {
295
+ ; CHECK-LABEL: @test_fence4
296
+ ; CHECK-LABEL: entry
297
+ ; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
298
+ ; CHECK: br label %loop
299
+ entry:
300
+ %addr.i = getelementptr inbounds i32 , i32* %addr , i64 8
301
+ %gep = bitcast i32* %addr.i to i8 *
302
+ br label %loop
303
+
304
+ loop:
305
+ %indvar = phi i32 [ %indvar.next , %loop ], [ 0 , %entry ]
306
+ %sum = phi i32 [ %sum.next , %loop ], [ 0 , %entry ]
307
+ store atomic i32 5 , i32 * %addr.i unordered , align 8
308
+ fence release
309
+ %invst = call {}* @llvm.invariant.start.p0i8 (i64 4 , i8* %gep )
310
+ %volload = load atomic i8 , i8* %volatile unordered , align 8
311
+ fence acquire
312
+ %volchk = icmp eq i8 %volload , 0
313
+ %addrld = load atomic i32 , i32* %addr.i unordered , align 8
314
+ %sel = select i1 %volchk , i32 0 , i32 %addrld
315
+ %sum.next = add i32 %sel , %sum
316
+ %indvar.next = add i32 %indvar , 1
317
+ %cond = icmp slt i32 %indvar.next , %n
318
+ br i1 %cond , label %loop , label %loopexit
319
+
320
+ loopexit:
321
+ ret i32 %sum
322
+ }
0 commit comments