1
1
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s 2>&1 | FileCheck %s
2
2
3
- ; CHECK: remark: {{.*}}: loop not vectorized: value could not be identified as an induction or reduction variable
4
-
5
3
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
6
4
7
5
@f = common global i32 0 , align 4
@@ -11,13 +9,22 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
11
9
@b = common global i32 0 , align 4
12
10
@e = common global i32 0 , align 4
13
11
14
- ; We used to vectorize this loop. But it has a value that is used outside of the
12
+ ; It has a value that is used outside of the loop
15
13
; and is not a recognized reduction variable "tmp17".
14
+ ; However, tmp17 is a non-header phi which is an allowed exit.
16
15
17
- ; CHECK-LABEL: @main(
18
- ; CHECK-NOT: <2 x i32>
16
+ ; CHECK-LABEL: @test1(
17
+ ; CHECK: %vec.ind = phi <2 x i32>
18
+ ; CHECK: [[CMP:%[a-zA-Z0-9.]+]] = icmp sgt <2 x i32> %vec.ind, <i32 10, i32 10>
19
+ ; CHECK: %predphi = select <2 x i1> [[CMP]], <2 x i32> <i32 1, i32 1>, <2 x i32> zeroinitializer
20
+
21
+ ; CHECK-LABEL: middle.block:
22
+ ; CHECK: [[E1:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> %predphi, i32 1
23
+
24
+ ; CHECK-LABEL: f1.exit.loopexit:
25
+ ; CHECK: %.lcssa = phi i32 [ %tmp17, %bb16 ], [ [[E1]], %middle.block ]
19
26
20
- define i32 @main () {
27
+ define i32 @test1 () {
21
28
bb:
22
29
%b.promoted = load i32 , i32* @b , align 4
23
30
br label %.lr.ph.i
@@ -40,3 +47,221 @@ f1.exit.loopexit:
40
47
%.lcssa = phi i32 [ %tmp17 , %bb16 ]
41
48
ret i32 %.lcssa
42
49
}
50
+
51
+ ; non-hdr phi depends on header phi.
52
+ ; CHECK-LABEL: @test2(
53
+ ; CHECK: %vec.ind = phi <2 x i32>
54
+ ; CHECK: [[CMP:%[a-zA-Z0-9.]+]] = icmp sgt <2 x i32> %vec.ind, <i32 10, i32 10>
55
+ ; CHECK: %predphi = select <2 x i1> [[CMP]], <2 x i32> <i32 1, i32 1>, <2 x i32> %vec.ind
56
+
57
+ ; CHECK-LABEL: middle.block:
58
+ ; CHECK: [[E1:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> %predphi, i32 1
59
+
60
+ ; CHECK-LABEL: f1.exit.loopexit:
61
+ ; CHECK: %.lcssa = phi i32 [ %tmp17, %bb16 ], [ [[E1]], %middle.block ]
62
+ define i32 @test2 () {
63
+ bb:
64
+ %b.promoted = load i32 , i32* @b , align 4
65
+ br label %.lr.ph.i
66
+
67
+ .lr.ph.i:
68
+ %tmp8 = phi i32 [ %tmp18 , %bb16 ], [ %b.promoted , %bb ]
69
+ %tmp2 = icmp sgt i32 %tmp8 , 10
70
+ br i1 %tmp2 , label %bb16 , label %bb10
71
+
72
+ bb10:
73
+ br label %bb16
74
+
75
+ bb16:
76
+ %tmp17 = phi i32 [ %tmp8 , %bb10 ], [ 1 , %.lr.ph.i ]
77
+ %tmp18 = add nsw i32 %tmp8 , 1
78
+ %tmp19 = icmp slt i32 %tmp18 , 4
79
+ br i1 %tmp19 , label %.lr.ph.i , label %f1.exit.loopexit
80
+
81
+ f1.exit.loopexit:
82
+ %.lcssa = phi i32 [ %tmp17 , %bb16 ]
83
+ ret i32 %.lcssa
84
+ }
85
+
86
+ ; more than 2 incoming values for tmp17 phi that is used outside loop.
87
+ ; CHECK-LABEL: test3(
88
+ ; CHECK-LABEL: vector.body:
89
+ ; CHECK: %predphi = select <2 x i1> %{{.*}}, <2 x i32> <i32 1, i32 1>, <2 x i32> zeroinitializer
90
+ ; CHECK: %predphi1 = select <2 x i1> %{{.*}}, <2 x i32> <i32 2, i32 2>, <2 x i32> %predphi
91
+
92
+ ; CHECK-LABEL: middle.block:
93
+ ; CHECK: [[E1:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> %predphi1, i32 1
94
+
95
+ ; CHECK-LABEL: f1.exit.loopexit:
96
+ ; CHECK: phi i32 [ %tmp17, %bb16 ], [ [[E1]], %middle.block ]
97
+ define i32 @test3 (i32 %N ) {
98
+ bb:
99
+ %b.promoted = load i32 , i32* @b , align 4
100
+ br label %.lr.ph.i
101
+
102
+ .lr.ph.i:
103
+ %tmp8 = phi i32 [ %tmp18 , %bb16 ], [ %b.promoted , %bb ]
104
+ %tmp2 = icmp sgt i32 %tmp8 , 10
105
+ br i1 %tmp2 , label %bb16 , label %bb10
106
+
107
+ bb10:
108
+ %cmp = icmp sgt i32 %tmp8 , %N
109
+ br i1 %cmp , label %bb12 , label %bb16
110
+
111
+ bb12:
112
+ br label %bb16
113
+
114
+ bb16:
115
+ %tmp17 = phi i32 [ 0 , %bb10 ], [ 1 , %.lr.ph.i ], [ 2 , %bb12 ]
116
+ %tmp18 = add nsw i32 %tmp8 , 1
117
+ %tmp19 = icmp slt i32 %tmp18 , 4
118
+ br i1 %tmp19 , label %.lr.ph.i , label %f1.exit.loopexit
119
+
120
+ f1.exit.loopexit:
121
+ %.lcssa = phi i32 [ %tmp17 , %bb16 ]
122
+ ret i32 %.lcssa
123
+ }
124
+
125
+ ; more than one incoming value for outside user: %.lcssa
126
+ ; CHECK-LABEL: test4(
127
+ ; CHECK-LABEL: vector.body:
128
+ ; CHECK: %predphi = select <2 x i1>
129
+
130
+ ; CHECK-LABEL: middle.block:
131
+ ; CHECK: [[E1:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> %predphi, i32 1
132
+
133
+ ; CHECK-LABEL: f1.exit.loopexit.loopexit:
134
+ ; CHECK: %tmp17.lcssa = phi i32 [ %tmp17, %bb16 ], [ [[E1]], %middle.block ]
135
+ ; CHECK-NEXT: br label %f1.exit.loopexit
136
+
137
+ ; CHECK-LABEL: f1.exit.loopexit:
138
+ ; CHECK: %.lcssa = phi i32 [ 2, %bb ], [ %tmp17.lcssa, %f1.exit.loopexit.loopexit ]
139
+ define i32 @test4 (i32 %N ) {
140
+ bb:
141
+ %b.promoted = load i32 , i32* @b , align 4
142
+ %icmp = icmp slt i32 %b.promoted , %N
143
+ br i1 %icmp , label %f1.exit.loopexit , label %.lr.ph.i
144
+
145
+ .lr.ph.i:
146
+ %tmp8 = phi i32 [ %tmp18 , %bb16 ], [ %b.promoted , %bb ]
147
+ %tmp2 = icmp sgt i32 %tmp8 , 10
148
+ br i1 %tmp2 , label %bb16 , label %bb10
149
+
150
+ bb10:
151
+ br label %bb16
152
+
153
+ bb16:
154
+ %tmp17 = phi i32 [ 0 , %bb10 ], [ 1 , %.lr.ph.i ]
155
+ %tmp18 = add nsw i32 %tmp8 , 1
156
+ %tmp19 = icmp slt i32 %tmp18 , 4
157
+ br i1 %tmp19 , label %.lr.ph.i , label %f1.exit.loopexit
158
+
159
+ f1.exit.loopexit:
160
+ %.lcssa = phi i32 [ %tmp17 , %bb16 ], [ 2 , %bb ]
161
+ ret i32 %.lcssa
162
+ }
163
+
164
+ ; non hdr phi that depends on reduction and is used outside the loop.
165
+ ; reduction phis are only allowed to have bump or reduction operations as the inside user, so we should
166
+ ; not vectorize this.
167
+ ; CHECK-LABEL: reduction_sum(
168
+ ; CHECK-NOT: <2 x i32>
169
+ define i32 @reduction_sum (i32 %n , i32* noalias nocapture %A , i32* noalias nocapture %B ) nounwind uwtable readonly noinline ssp {
170
+ entry:
171
+ %c1 = icmp sgt i32 %n , 0
172
+ br i1 %c1 , label %header , label %._crit_edge
173
+
174
+ header: ; preds = %0, %.lr.ph
175
+ %indvars.iv = phi i64 [ %indvars.iv.next , %bb16 ], [ 0 , %entry ]
176
+ %sum.02 = phi i32 [ %c9 , %bb16 ], [ 0 , %entry ]
177
+ %c2 = getelementptr inbounds i32 , i32* %A , i64 %indvars.iv
178
+ %c3 = load i32 , i32* %c2 , align 4
179
+ %c4 = getelementptr inbounds i32 , i32* %B , i64 %indvars.iv
180
+ %c5 = load i32 , i32* %c4 , align 4
181
+ %tmp2 = icmp sgt i32 %sum.02 , 10
182
+ br i1 %tmp2 , label %bb16 , label %bb10
183
+
184
+ bb10:
185
+ br label %bb16
186
+
187
+ bb16:
188
+ %tmp17 = phi i32 [ %sum.02 , %bb10 ], [ 1 , %header ]
189
+ %c6 = trunc i64 %indvars.iv to i32
190
+ %c7 = add i32 %sum.02 , %c6
191
+ %c8 = add i32 %c7 , %c3
192
+ %c9 = add i32 %c8 , %c5
193
+ %indvars.iv.next = add i64 %indvars.iv , 1
194
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
195
+ %exitcond = icmp eq i32 %lftr.wideiv , %n
196
+ br i1 %exitcond , label %._crit_edge , label %header
197
+
198
+ ._crit_edge: ; preds = %.lr.ph, %0
199
+ %sum.0.lcssa = phi i32 [ 0 , %entry ], [ %c9 , %bb16 ]
200
+ %nonhdr.lcssa = phi i32 [ 1 , %entry ], [ %tmp17 , %bb16 ]
201
+ ret i32 %sum.0.lcssa
202
+ }
203
+
204
+ ; invalid cyclic dependency with header phi iv, which prevents iv from being
205
+ ; recognized as induction var.
206
+ ; cannot vectorize.
207
+ ; CHECK-LABEL: cyclic_dep_with_indvar(
208
+ ; CHECK-NOT: <2 x i32>
209
+ define i32 @cyclic_dep_with_indvar () {
210
+ bb:
211
+ %b.promoted = load i32 , i32* @b , align 4
212
+ br label %.lr.ph.i
213
+
214
+ .lr.ph.i:
215
+ %iv = phi i32 [ %ivnext , %bb16 ], [ %b.promoted , %bb ]
216
+ %tmp2 = icmp sgt i32 %iv , 10
217
+ br i1 %tmp2 , label %bb16 , label %bb10
218
+
219
+ bb10:
220
+ br label %bb16
221
+
222
+ bb16:
223
+ %tmp17 = phi i32 [ 0 , %bb10 ], [ %iv , %.lr.ph.i ]
224
+ %ivnext = add nsw i32 %tmp17 , 1
225
+ %tmp19 = icmp slt i32 %ivnext , 4
226
+ br i1 %tmp19 , label %.lr.ph.i , label %f1.exit.loopexit
227
+
228
+ f1.exit.loopexit:
229
+ %.lcssa = phi i32 [ %tmp17 , %bb16 ]
230
+ ret i32 %.lcssa
231
+ }
232
+
233
+ ; non-reduction phi 'tmp17' used outside loop has cyclic dependence with %x.05 phi
234
+ ; cannot vectorize.
235
+ ; CHECK-LABEL: not_valid_reduction(
236
+ ; CHECK-NOT: <2 x i32>
237
+ define i32 @not_valid_reduction (i32 %n , i32* noalias nocapture %A ) nounwind uwtable readonly {
238
+ entry:
239
+ %cmp4 = icmp sgt i32 %n , 0
240
+ br i1 %cmp4 , label %for.body , label %for.end
241
+
242
+ for.body: ; preds = %entry, %for.body
243
+ %indvars.iv = phi i64 [ %indvars.iv.next , %latch ], [ 0 , %entry ]
244
+ %x.05 = phi i32 [ %tmp17 , %latch ], [ 0 , %entry ]
245
+ %arrayidx = getelementptr inbounds i32 , i32* %A , i64 %indvars.iv
246
+ %tmp0 = load i32 , i32* %arrayidx , align 4
247
+ %tmp2 = icmp sgt i64 %indvars.iv , 10
248
+ %sub = sub nsw i32 %x.05 , %tmp0
249
+ br i1 %tmp2 , label %bb16 , label %bb10
250
+
251
+ bb10:
252
+ br label %bb16
253
+
254
+ bb16:
255
+ %tmp17 = phi i32 [ 1 , %bb10 ], [ %sub , %for.body ]
256
+ br label %latch
257
+
258
+ latch:
259
+ %indvars.iv.next = add i64 %indvars.iv , 1
260
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
261
+ %exitcond = icmp eq i32 %lftr.wideiv , %n
262
+ br i1 %exitcond , label %for.end , label %for.body
263
+
264
+ for.end: ; preds = %for.body, %entry
265
+ %x.0.lcssa = phi i32 [ 0 , %entry ], [ %tmp17 , %latch ]
266
+ ret i32 %x.0.lcssa
267
+ }
0 commit comments