Skip to content

Commit 72186a2

Browse files
committedJun 20, 2019
[SLP][X86] Add lookahead reordering tests from D60897
llvm-svn: 363925
1 parent 98a0ac5 commit 72186a2

File tree

1 file changed

+235
-3
lines changed

1 file changed

+235
-3
lines changed
 

Diff for: ‎llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll

+235-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx | FileCheck %s
33
;
4-
; This checks the look-ahead operand reordering heuristic
4+
; This file tests the look-ahead operand reordering heuristic.
5+
;
6+
;
7+
; This checks that operand reordering will reorder the operands of the adds
8+
; by taking into consideration the instructions beyond the immediate
9+
; predecessors.
510
;
611
; A[0] B[0] C[0] D[0] C[1] D[1] A[1] B[1]
712
; \ / \ / \ / \ /
@@ -11,8 +16,8 @@
1116
; | |
1217
; S[0] S[1]
1318
;
14-
define void @test(double* %array) {
15-
; CHECK-LABEL: @test(
19+
define void @lookahead_basic(double* %array) {
20+
; CHECK-LABEL: @lookahead_basic(
1621
; CHECK-NEXT: entry:
1722
; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
1823
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
@@ -72,3 +77,230 @@ entry:
7277
store double %addCDAB_1, double *%idx1, align 8
7378
ret void
7479
}
80+
81+
82+
; Check whether the look-ahead operand reordering heuristic will avoid
83+
; bundling the alt opcodes. The vectorized code should have no shuffles.
84+
;
85+
; A[0] B[0] A[0] B[0] A[1] A[1] A[1] B[1]
86+
; \ / \ / \ / \ /
87+
; + - - +
88+
; \ / \ /
89+
; + +
90+
; | |
91+
; S[0] S[1]
92+
;
93+
define void @lookahead_alt1(double* %array) {
94+
; CHECK-LABEL: @lookahead_alt1(
95+
; CHECK-NEXT: entry:
96+
; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
97+
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
98+
; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2
99+
; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 3
100+
; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4
101+
; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5
102+
; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6
103+
; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7
104+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
105+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
106+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>*
107+
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
108+
; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
109+
; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]]
110+
; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP4]]
111+
; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
112+
; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8
113+
; CHECK-NEXT: ret void
114+
;
115+
entry:
116+
%idx0 = getelementptr inbounds double, double* %array, i64 0
117+
%idx1 = getelementptr inbounds double, double* %array, i64 1
118+
%idx2 = getelementptr inbounds double, double* %array, i64 2
119+
%idx3 = getelementptr inbounds double, double* %array, i64 3
120+
%idx4 = getelementptr inbounds double, double* %array, i64 4
121+
%idx5 = getelementptr inbounds double, double* %array, i64 5
122+
%idx6 = getelementptr inbounds double, double* %array, i64 6
123+
%idx7 = getelementptr inbounds double, double* %array, i64 7
124+
125+
%A_0 = load double, double *%idx0, align 8
126+
%A_1 = load double, double *%idx1, align 8
127+
%B_0 = load double, double *%idx2, align 8
128+
%B_1 = load double, double *%idx3, align 8
129+
130+
%addAB_0_L = fadd fast double %A_0, %B_0
131+
%subAB_0_R = fsub fast double %A_0, %B_0
132+
133+
%subAB_1_L = fsub fast double %A_1, %B_1
134+
%addAB_1_R = fadd fast double %A_1, %B_1
135+
136+
%addABCD_0 = fadd fast double %addAB_0_L, %subAB_0_R
137+
%addCDAB_1 = fadd fast double %subAB_1_L, %addAB_1_R
138+
139+
store double %addABCD_0, double *%idx0, align 8
140+
store double %addCDAB_1, double *%idx1, align 8
141+
ret void
142+
}
143+
144+
145+
; This code should get vectorized all the way to the loads with shuffles for
146+
; the alt opcodes.
147+
;
148+
; A[0] B[0] C[0] D[0] C[1] D[1] A[1] B[1]
149+
; \ / \ / \ / \ /
150+
; + - + -
151+
; \ / \ /
152+
; + +
153+
; | |
154+
; S[0] S[1]
155+
;
156+
define void @lookahead_alt2(double* %array) {
157+
; CHECK-LABEL: @lookahead_alt2(
158+
; CHECK-NEXT: entry:
159+
; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
160+
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
161+
; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2
162+
; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 3
163+
; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4
164+
; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5
165+
; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6
166+
; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7
167+
; CHECK-NEXT: [[A_0:%.*]] = load double, double* [[IDX0]], align 8
168+
; CHECK-NEXT: [[A_1:%.*]] = load double, double* [[IDX1]], align 8
169+
; CHECK-NEXT: [[B_0:%.*]] = load double, double* [[IDX2]], align 8
170+
; CHECK-NEXT: [[B_1:%.*]] = load double, double* [[IDX3]], align 8
171+
; CHECK-NEXT: [[C_0:%.*]] = load double, double* [[IDX4]], align 8
172+
; CHECK-NEXT: [[C_1:%.*]] = load double, double* [[IDX5]], align 8
173+
; CHECK-NEXT: [[D_0:%.*]] = load double, double* [[IDX6]], align 8
174+
; CHECK-NEXT: [[D_1:%.*]] = load double, double* [[IDX7]], align 8
175+
; CHECK-NEXT: [[ADDAB_0:%.*]] = fadd fast double [[A_0]], [[B_0]]
176+
; CHECK-NEXT: [[SUBCD_0:%.*]] = fsub fast double [[C_0]], [[D_0]]
177+
; CHECK-NEXT: [[ADDCD_1:%.*]] = fadd fast double [[C_1]], [[D_1]]
178+
; CHECK-NEXT: [[SUBAB_1:%.*]] = fsub fast double [[A_1]], [[B_1]]
179+
; CHECK-NEXT: [[ADDABCD_0:%.*]] = fadd fast double [[ADDAB_0]], [[SUBCD_0]]
180+
; CHECK-NEXT: [[ADDCDAB_1:%.*]] = fadd fast double [[ADDCD_1]], [[SUBAB_1]]
181+
; CHECK-NEXT: store double [[ADDABCD_0]], double* [[IDX0]], align 8
182+
; CHECK-NEXT: store double [[ADDCDAB_1]], double* [[IDX1]], align 8
183+
; CHECK-NEXT: ret void
184+
;
185+
entry:
186+
%idx0 = getelementptr inbounds double, double* %array, i64 0
187+
%idx1 = getelementptr inbounds double, double* %array, i64 1
188+
%idx2 = getelementptr inbounds double, double* %array, i64 2
189+
%idx3 = getelementptr inbounds double, double* %array, i64 3
190+
%idx4 = getelementptr inbounds double, double* %array, i64 4
191+
%idx5 = getelementptr inbounds double, double* %array, i64 5
192+
%idx6 = getelementptr inbounds double, double* %array, i64 6
193+
%idx7 = getelementptr inbounds double, double* %array, i64 7
194+
195+
%A_0 = load double, double *%idx0, align 8
196+
%A_1 = load double, double *%idx1, align 8
197+
%B_0 = load double, double *%idx2, align 8
198+
%B_1 = load double, double *%idx3, align 8
199+
%C_0 = load double, double *%idx4, align 8
200+
%C_1 = load double, double *%idx5, align 8
201+
%D_0 = load double, double *%idx6, align 8
202+
%D_1 = load double, double *%idx7, align 8
203+
204+
%addAB_0 = fadd fast double %A_0, %B_0
205+
%subCD_0 = fsub fast double %C_0, %D_0
206+
207+
%addCD_1 = fadd fast double %C_1, %D_1
208+
%subAB_1 = fsub fast double %A_1, %B_1
209+
210+
%addABCD_0 = fadd fast double %addAB_0, %subCD_0
211+
%addCDAB_1 = fadd fast double %addCD_1, %subAB_1
212+
213+
store double %addABCD_0, double *%idx0, align 8
214+
store double %addCDAB_1, double *%idx1, align 8
215+
ret void
216+
}
217+
218+
219+
;
220+
; A[0] B[0] C[0] D[0] A[1] B[2] A[2] B[1]
221+
; \ / \ / / \ / \ /
222+
; - - U - -
223+
; \ / \ /
224+
; + +
225+
; | |
226+
; S[0] S[1]
227+
;
228+
; SLP should reorder the operands of the RHS add taking into consideration the cost of external uses.
229+
; It is more profitable to reorder the operands of the RHS add, because A[1] has an external use.
230+
231+
define void @lookahead_external_uses(double* %A, double *%B, double *%C, double *%D, double *%S, double *%Ext1, double *%Ext2) {
232+
; CHECK-LABEL: @lookahead_external_uses(
233+
; CHECK-NEXT: entry:
234+
; CHECK-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
235+
; CHECK-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0
236+
; CHECK-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0
237+
; CHECK-NEXT: [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0
238+
; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
239+
; CHECK-NEXT: [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2
240+
; CHECK-NEXT: [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2
241+
; CHECK-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
242+
; CHECK-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8
243+
; CHECK-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
244+
; CHECK-NEXT: [[D0:%.*]] = load double, double* [[IDXD0]], align 8
245+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
246+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
247+
; CHECK-NEXT: [[B2:%.*]] = load double, double* [[IDXB2]], align 8
248+
; CHECK-NEXT: [[A2:%.*]] = load double, double* [[IDXA2]], align 8
249+
; CHECK-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8
250+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0
251+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B2]], i32 1
252+
; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
253+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
254+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A2]], i32 1
255+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> undef, double [[D0]], i32 0
256+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[B1]], i32 1
257+
; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP6]], [[TMP8]]
258+
; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP9]]
259+
; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
260+
; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
261+
; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
262+
; CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
263+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
264+
; CHECK-NEXT: store double [[TMP12]], double* [[EXT1:%.*]], align 8
265+
; CHECK-NEXT: ret void
266+
;
267+
entry:
268+
%IdxA0 = getelementptr inbounds double, double* %A, i64 0
269+
%IdxB0 = getelementptr inbounds double, double* %B, i64 0
270+
%IdxC0 = getelementptr inbounds double, double* %C, i64 0
271+
%IdxD0 = getelementptr inbounds double, double* %D, i64 0
272+
273+
%IdxA1 = getelementptr inbounds double, double* %A, i64 1
274+
%IdxB2 = getelementptr inbounds double, double* %B, i64 2
275+
%IdxA2 = getelementptr inbounds double, double* %A, i64 2
276+
%IdxB1 = getelementptr inbounds double, double* %B, i64 1
277+
278+
%A0 = load double, double *%IdxA0, align 8
279+
%B0 = load double, double *%IdxB0, align 8
280+
%C0 = load double, double *%IdxC0, align 8
281+
%D0 = load double, double *%IdxD0, align 8
282+
283+
%A1 = load double, double *%IdxA1, align 8
284+
%B2 = load double, double *%IdxB2, align 8
285+
%A2 = load double, double *%IdxA2, align 8
286+
%B1 = load double, double *%IdxB1, align 8
287+
288+
%subA0B0 = fsub fast double %A0, %B0
289+
%subC0D0 = fsub fast double %C0, %D0
290+
291+
%subA1B2 = fsub fast double %A1, %B2
292+
%subA2B1 = fsub fast double %A2, %B1
293+
294+
%add0 = fadd fast double %subA0B0, %subC0D0
295+
%add1 = fadd fast double %subA1B2, %subA2B1
296+
297+
%IdxS0 = getelementptr inbounds double, double* %S, i64 0
298+
%IdxS1 = getelementptr inbounds double, double* %S, i64 1
299+
300+
store double %add0, double *%IdxS0, align 8
301+
store double %add1, double *%IdxS1, align 8
302+
303+
; External use
304+
store double %A1, double *%Ext1, align 8
305+
ret void
306+
}

0 commit comments

Comments
 (0)
Please sign in to comment.