@@ -117,3 +117,270 @@ entry:
117
117
; CHECK: store <4 x i32>
118
118
; CHECK: ret
119
119
}
120
+
121
+ declare i32 @llvm.ctlz.i32 (i32 ,i1 ) nounwind readnone
122
+
123
+ define void @vec_ctlz_i32 (i32* %a , i32* %b , i32* %c , i1 ) {
124
+ entry:
125
+ %i0 = load i32* %a , align 4
126
+ %i1 = load i32* %b , align 4
127
+ %add1 = add i32 %i0 , %i1
128
+ %call1 = tail call i32 @llvm.ctlz.i32 (i32 %add1 ,i1 true ) nounwind readnone
129
+
130
+ %arrayidx2 = getelementptr inbounds i32* %a , i32 1
131
+ %i2 = load i32* %arrayidx2 , align 4
132
+ %arrayidx3 = getelementptr inbounds i32* %b , i32 1
133
+ %i3 = load i32* %arrayidx3 , align 4
134
+ %add2 = add i32 %i2 , %i3
135
+ %call2 = tail call i32 @llvm.ctlz.i32 (i32 %add2 ,i1 true ) nounwind readnone
136
+
137
+ %arrayidx4 = getelementptr inbounds i32* %a , i32 2
138
+ %i4 = load i32* %arrayidx4 , align 4
139
+ %arrayidx5 = getelementptr inbounds i32* %b , i32 2
140
+ %i5 = load i32* %arrayidx5 , align 4
141
+ %add3 = add i32 %i4 , %i5
142
+ %call3 = tail call i32 @llvm.ctlz.i32 (i32 %add3 ,i1 true ) nounwind readnone
143
+
144
+ %arrayidx6 = getelementptr inbounds i32* %a , i32 3
145
+ %i6 = load i32* %arrayidx6 , align 4
146
+ %arrayidx7 = getelementptr inbounds i32* %b , i32 3
147
+ %i7 = load i32* %arrayidx7 , align 4
148
+ %add4 = add i32 %i6 , %i7
149
+ %call4 = tail call i32 @llvm.ctlz.i32 (i32 %add4 ,i1 true ) nounwind readnone
150
+
151
+ store i32 %call1 , i32* %c , align 4
152
+ %arrayidx8 = getelementptr inbounds i32* %c , i32 1
153
+ store i32 %call2 , i32* %arrayidx8 , align 4
154
+ %arrayidx9 = getelementptr inbounds i32* %c , i32 2
155
+ store i32 %call3 , i32* %arrayidx9 , align 4
156
+ %arrayidx10 = getelementptr inbounds i32* %c , i32 3
157
+ store i32 %call4 , i32* %arrayidx10 , align 4
158
+ ret void
159
+
160
+ ; CHECK-LABEL: @vec_ctlz_i32(
161
+ ; CHECK: load <4 x i32>
162
+ ; CHECK: load <4 x i32>
163
+ ; CHECK: call <4 x i32> @llvm.ctlz.v4i32
164
+ ; CHECK: store <4 x i32>
165
+ ; CHECK: ret
166
+ }
167
+
168
+ define void @vec_ctlz_i32_neg (i32* %a , i32* %b , i32* %c , i1 ) {
169
+ entry:
170
+ %i0 = load i32* %a , align 4
171
+ %i1 = load i32* %b , align 4
172
+ %add1 = add i32 %i0 , %i1
173
+ %call1 = tail call i32 @llvm.ctlz.i32 (i32 %add1 ,i1 true ) nounwind readnone
174
+
175
+ %arrayidx2 = getelementptr inbounds i32* %a , i32 1
176
+ %i2 = load i32* %arrayidx2 , align 4
177
+ %arrayidx3 = getelementptr inbounds i32* %b , i32 1
178
+ %i3 = load i32* %arrayidx3 , align 4
179
+ %add2 = add i32 %i2 , %i3
180
+ %call2 = tail call i32 @llvm.ctlz.i32 (i32 %add2 ,i1 false ) nounwind readnone
181
+
182
+ %arrayidx4 = getelementptr inbounds i32* %a , i32 2
183
+ %i4 = load i32* %arrayidx4 , align 4
184
+ %arrayidx5 = getelementptr inbounds i32* %b , i32 2
185
+ %i5 = load i32* %arrayidx5 , align 4
186
+ %add3 = add i32 %i4 , %i5
187
+ %call3 = tail call i32 @llvm.ctlz.i32 (i32 %add3 ,i1 true ) nounwind readnone
188
+
189
+ %arrayidx6 = getelementptr inbounds i32* %a , i32 3
190
+ %i6 = load i32* %arrayidx6 , align 4
191
+ %arrayidx7 = getelementptr inbounds i32* %b , i32 3
192
+ %i7 = load i32* %arrayidx7 , align 4
193
+ %add4 = add i32 %i6 , %i7
194
+ %call4 = tail call i32 @llvm.ctlz.i32 (i32 %add4 ,i1 false ) nounwind readnone
195
+
196
+ store i32 %call1 , i32* %c , align 4
197
+ %arrayidx8 = getelementptr inbounds i32* %c , i32 1
198
+ store i32 %call2 , i32* %arrayidx8 , align 4
199
+ %arrayidx9 = getelementptr inbounds i32* %c , i32 2
200
+ store i32 %call3 , i32* %arrayidx9 , align 4
201
+ %arrayidx10 = getelementptr inbounds i32* %c , i32 3
202
+ store i32 %call4 , i32* %arrayidx10 , align 4
203
+ ret void
204
+
205
+ ; CHECK-LABEL: @vec_ctlz_i32_neg(
206
+ ; CHECK-NOT: call <4 x i32> @llvm.ctlz.v4i32
207
+
208
+ }
209
+
210
+
211
+ declare i32 @llvm.cttz.i32 (i32 ,i1 ) nounwind readnone
212
+
213
+ define void @vec_cttz_i32 (i32* %a , i32* %b , i32* %c , i1 ) {
214
+ entry:
215
+ %i0 = load i32* %a , align 4
216
+ %i1 = load i32* %b , align 4
217
+ %add1 = add i32 %i0 , %i1
218
+ %call1 = tail call i32 @llvm.cttz.i32 (i32 %add1 ,i1 true ) nounwind readnone
219
+
220
+ %arrayidx2 = getelementptr inbounds i32* %a , i32 1
221
+ %i2 = load i32* %arrayidx2 , align 4
222
+ %arrayidx3 = getelementptr inbounds i32* %b , i32 1
223
+ %i3 = load i32* %arrayidx3 , align 4
224
+ %add2 = add i32 %i2 , %i3
225
+ %call2 = tail call i32 @llvm.cttz.i32 (i32 %add2 ,i1 true ) nounwind readnone
226
+
227
+ %arrayidx4 = getelementptr inbounds i32* %a , i32 2
228
+ %i4 = load i32* %arrayidx4 , align 4
229
+ %arrayidx5 = getelementptr inbounds i32* %b , i32 2
230
+ %i5 = load i32* %arrayidx5 , align 4
231
+ %add3 = add i32 %i4 , %i5
232
+ %call3 = tail call i32 @llvm.cttz.i32 (i32 %add3 ,i1 true ) nounwind readnone
233
+
234
+ %arrayidx6 = getelementptr inbounds i32* %a , i32 3
235
+ %i6 = load i32* %arrayidx6 , align 4
236
+ %arrayidx7 = getelementptr inbounds i32* %b , i32 3
237
+ %i7 = load i32* %arrayidx7 , align 4
238
+ %add4 = add i32 %i6 , %i7
239
+ %call4 = tail call i32 @llvm.cttz.i32 (i32 %add4 ,i1 true ) nounwind readnone
240
+
241
+ store i32 %call1 , i32* %c , align 4
242
+ %arrayidx8 = getelementptr inbounds i32* %c , i32 1
243
+ store i32 %call2 , i32* %arrayidx8 , align 4
244
+ %arrayidx9 = getelementptr inbounds i32* %c , i32 2
245
+ store i32 %call3 , i32* %arrayidx9 , align 4
246
+ %arrayidx10 = getelementptr inbounds i32* %c , i32 3
247
+ store i32 %call4 , i32* %arrayidx10 , align 4
248
+ ret void
249
+
250
+ ; CHECK-LABEL: @vec_cttz_i32(
251
+ ; CHECK: load <4 x i32>
252
+ ; CHECK: load <4 x i32>
253
+ ; CHECK: call <4 x i32> @llvm.cttz.v4i32
254
+ ; CHECK: store <4 x i32>
255
+ ; CHECK: ret
256
+ }
257
+
258
+ define void @vec_cttz_i32_neg (i32* %a , i32* %b , i32* %c , i1 ) {
259
+ entry:
260
+ %i0 = load i32* %a , align 4
261
+ %i1 = load i32* %b , align 4
262
+ %add1 = add i32 %i0 , %i1
263
+ %call1 = tail call i32 @llvm.cttz.i32 (i32 %add1 ,i1 true ) nounwind readnone
264
+
265
+ %arrayidx2 = getelementptr inbounds i32* %a , i32 1
266
+ %i2 = load i32* %arrayidx2 , align 4
267
+ %arrayidx3 = getelementptr inbounds i32* %b , i32 1
268
+ %i3 = load i32* %arrayidx3 , align 4
269
+ %add2 = add i32 %i2 , %i3
270
+ %call2 = tail call i32 @llvm.cttz.i32 (i32 %add2 ,i1 false ) nounwind readnone
271
+
272
+ %arrayidx4 = getelementptr inbounds i32* %a , i32 2
273
+ %i4 = load i32* %arrayidx4 , align 4
274
+ %arrayidx5 = getelementptr inbounds i32* %b , i32 2
275
+ %i5 = load i32* %arrayidx5 , align 4
276
+ %add3 = add i32 %i4 , %i5
277
+ %call3 = tail call i32 @llvm.cttz.i32 (i32 %add3 ,i1 true ) nounwind readnone
278
+
279
+ %arrayidx6 = getelementptr inbounds i32* %a , i32 3
280
+ %i6 = load i32* %arrayidx6 , align 4
281
+ %arrayidx7 = getelementptr inbounds i32* %b , i32 3
282
+ %i7 = load i32* %arrayidx7 , align 4
283
+ %add4 = add i32 %i6 , %i7
284
+ %call4 = tail call i32 @llvm.cttz.i32 (i32 %add4 ,i1 false ) nounwind readnone
285
+
286
+ store i32 %call1 , i32* %c , align 4
287
+ %arrayidx8 = getelementptr inbounds i32* %c , i32 1
288
+ store i32 %call2 , i32* %arrayidx8 , align 4
289
+ %arrayidx9 = getelementptr inbounds i32* %c , i32 2
290
+ store i32 %call3 , i32* %arrayidx9 , align 4
291
+ %arrayidx10 = getelementptr inbounds i32* %c , i32 3
292
+ store i32 %call4 , i32* %arrayidx10 , align 4
293
+ ret void
294
+
295
+ ; CHECK-LABEL: @vec_cttz_i32_neg(
296
+ ; CHECK-NOT: call <4 x i32> @llvm.cttz.v4i32
297
+ }
298
+
299
+
300
+ declare float @llvm.powi.f32 (float , i32 )
301
+ define void @vec_powi_f32 (float * %a , float * %b , float * %c , i32 %P ) {
302
+ entry:
303
+ %i0 = load float * %a , align 4
304
+ %i1 = load float * %b , align 4
305
+ %add1 = fadd float %i0 , %i1
306
+ %call1 = tail call float @llvm.powi.f32 (float %add1 ,i32 %P ) nounwind readnone
307
+
308
+ %arrayidx2 = getelementptr inbounds float * %a , i32 1
309
+ %i2 = load float * %arrayidx2 , align 4
310
+ %arrayidx3 = getelementptr inbounds float * %b , i32 1
311
+ %i3 = load float * %arrayidx3 , align 4
312
+ %add2 = fadd float %i2 , %i3
313
+ %call2 = tail call float @llvm.powi.f32 (float %add2 ,i32 %P ) nounwind readnone
314
+
315
+ %arrayidx4 = getelementptr inbounds float * %a , i32 2
316
+ %i4 = load float * %arrayidx4 , align 4
317
+ %arrayidx5 = getelementptr inbounds float * %b , i32 2
318
+ %i5 = load float * %arrayidx5 , align 4
319
+ %add3 = fadd float %i4 , %i5
320
+ %call3 = tail call float @llvm.powi.f32 (float %add3 ,i32 %P ) nounwind readnone
321
+
322
+ %arrayidx6 = getelementptr inbounds float * %a , i32 3
323
+ %i6 = load float * %arrayidx6 , align 4
324
+ %arrayidx7 = getelementptr inbounds float * %b , i32 3
325
+ %i7 = load float * %arrayidx7 , align 4
326
+ %add4 = fadd float %i6 , %i7
327
+ %call4 = tail call float @llvm.powi.f32 (float %add4 ,i32 %P ) nounwind readnone
328
+
329
+ store float %call1 , float * %c , align 4
330
+ %arrayidx8 = getelementptr inbounds float * %c , i32 1
331
+ store float %call2 , float * %arrayidx8 , align 4
332
+ %arrayidx9 = getelementptr inbounds float * %c , i32 2
333
+ store float %call3 , float * %arrayidx9 , align 4
334
+ %arrayidx10 = getelementptr inbounds float * %c , i32 3
335
+ store float %call4 , float * %arrayidx10 , align 4
336
+ ret void
337
+
338
+ ; CHECK-LABEL: @vec_powi_f32(
339
+ ; CHECK: load <4 x float>
340
+ ; CHECK: load <4 x float>
341
+ ; CHECK: call <4 x float> @llvm.powi.v4f32
342
+ ; CHECK: store <4 x float>
343
+ ; CHECK: ret
344
+ }
345
+
346
+
347
+ define void @vec_powi_f32_neg (float * %a , float * %b , float * %c , i32 %P , i32 %Q ) {
348
+ entry:
349
+ %i0 = load float * %a , align 4
350
+ %i1 = load float * %b , align 4
351
+ %add1 = fadd float %i0 , %i1
352
+ %call1 = tail call float @llvm.powi.f32 (float %add1 ,i32 %P ) nounwind readnone
353
+
354
+ %arrayidx2 = getelementptr inbounds float * %a , i32 1
355
+ %i2 = load float * %arrayidx2 , align 4
356
+ %arrayidx3 = getelementptr inbounds float * %b , i32 1
357
+ %i3 = load float * %arrayidx3 , align 4
358
+ %add2 = fadd float %i2 , %i3
359
+ %call2 = tail call float @llvm.powi.f32 (float %add2 ,i32 %Q ) nounwind readnone
360
+
361
+ %arrayidx4 = getelementptr inbounds float * %a , i32 2
362
+ %i4 = load float * %arrayidx4 , align 4
363
+ %arrayidx5 = getelementptr inbounds float * %b , i32 2
364
+ %i5 = load float * %arrayidx5 , align 4
365
+ %add3 = fadd float %i4 , %i5
366
+ %call3 = tail call float @llvm.powi.f32 (float %add3 ,i32 %P ) nounwind readnone
367
+
368
+ %arrayidx6 = getelementptr inbounds float * %a , i32 3
369
+ %i6 = load float * %arrayidx6 , align 4
370
+ %arrayidx7 = getelementptr inbounds float * %b , i32 3
371
+ %i7 = load float * %arrayidx7 , align 4
372
+ %add4 = fadd float %i6 , %i7
373
+ %call4 = tail call float @llvm.powi.f32 (float %add4 ,i32 %Q ) nounwind readnone
374
+
375
+ store float %call1 , float * %c , align 4
376
+ %arrayidx8 = getelementptr inbounds float * %c , i32 1
377
+ store float %call2 , float * %arrayidx8 , align 4
378
+ %arrayidx9 = getelementptr inbounds float * %c , i32 2
379
+ store float %call3 , float * %arrayidx9 , align 4
380
+ %arrayidx10 = getelementptr inbounds float * %c , i32 3
381
+ store float %call4 , float * %arrayidx10 , align 4
382
+ ret void
383
+
384
+ ; CHECK-LABEL: @vec_powi_f32_neg(
385
+ ; CHECK-NOT: call <4 x float> @llvm.powi.v4f32
386
+ }
0 commit comments