Skip to content

Commit 700fdb1

Browse files
committedMay 30, 2019
[NFC][Codegen] Add better test coverage for potential add/sub constant folding
This adds hopefully-full test coverage for all the possible permutations: First op is one of: * x + c1 * x - c1 * c1 - x Second op is one of: * + c2 * - c2 * c2 - And thus 3*3=9 patterns. Some of them show missed constant-folds. Without previous patch (the revert), these tests were causing endless dagcombine loop. I really should have thought about this first :S llvm-svn: 362110
1 parent 019d270 commit 700fdb1

File tree

4 files changed

+1112
-292
lines changed

4 files changed

+1112
-292
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,455 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
3+
4+
declare void @use(<4 x i32> %arg)
5+
6+
; (x+c1)+c2
7+
8+
define <4 x i32> @add_const_add_const(<4 x i32> %arg) {
9+
; CHECK-LABEL: add_const_add_const:
10+
; CHECK: // %bb.0:
11+
; CHECK-NEXT: movi v1.4s, #10
12+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
13+
; CHECK-NEXT: ret
14+
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
15+
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
16+
ret <4 x i32> %t1
17+
}
18+
19+
define <4 x i32> @add_const_add_const_extrause(<4 x i32> %arg) {
20+
; CHECK-LABEL: add_const_add_const_extrause:
21+
; CHECK: // %bb.0:
22+
; CHECK-NEXT: sub sp, sp, #32 // =32
23+
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
24+
; CHECK-NEXT: .cfi_def_cfa_offset 32
25+
; CHECK-NEXT: .cfi_offset w30, -16
26+
; CHECK-NEXT: movi v1.4s, #8
27+
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
28+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
29+
; CHECK-NEXT: bl use
30+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
31+
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
32+
; CHECK-NEXT: movi v0.4s, #10
33+
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
34+
; CHECK-NEXT: add sp, sp, #32 // =32
35+
; CHECK-NEXT: ret
36+
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
37+
call void @use(<4 x i32> %t0)
38+
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
39+
ret <4 x i32> %t1
40+
}
41+
42+
define <4 x i32> @add_const_add_const_nonsplat(<4 x i32> %arg) {
43+
; CHECK-LABEL: add_const_add_const_nonsplat:
44+
; CHECK: // %bb.0:
45+
; CHECK-NEXT: adrp x8, .LCPI2_0
46+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
47+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
48+
; CHECK-NEXT: ret
49+
%t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
50+
%t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
51+
ret <4 x i32> %t1
52+
}
53+
54+
; (x+c1)-c2
55+
56+
define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
57+
; CHECK-LABEL: add_const_sub_const:
58+
; CHECK: // %bb.0:
59+
; CHECK-NEXT: movi v1.4s, #8
60+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
61+
; CHECK-NEXT: movi v1.4s, #2
62+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
63+
; CHECK-NEXT: ret
64+
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
65+
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
66+
ret <4 x i32> %t1
67+
}
68+
69+
define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
70+
; CHECK-LABEL: add_const_sub_const_extrause:
71+
; CHECK: // %bb.0:
72+
; CHECK-NEXT: sub sp, sp, #32 // =32
73+
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
74+
; CHECK-NEXT: .cfi_def_cfa_offset 32
75+
; CHECK-NEXT: .cfi_offset w30, -16
76+
; CHECK-NEXT: movi v1.4s, #8
77+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
78+
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
79+
; CHECK-NEXT: bl use
80+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
81+
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
82+
; CHECK-NEXT: movi v0.4s, #2
83+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
84+
; CHECK-NEXT: add sp, sp, #32 // =32
85+
; CHECK-NEXT: ret
86+
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
87+
call void @use(<4 x i32> %t0)
88+
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
89+
ret <4 x i32> %t1
90+
}
91+
92+
define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
93+
; CHECK-LABEL: add_const_sub_const_nonsplat:
94+
; CHECK: // %bb.0:
95+
; CHECK-NEXT: adrp x8, .LCPI5_0
96+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
97+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
98+
; CHECK-NEXT: ret
99+
%t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
100+
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
101+
ret <4 x i32> %t1
102+
}
103+
104+
; c2-(x+c1)
105+
106+
define <4 x i32> @add_const_const_sub(<4 x i32> %arg) {
107+
; CHECK-LABEL: add_const_const_sub:
108+
; CHECK: // %bb.0:
109+
; CHECK-NEXT: mvni v1.4s, #5
110+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
111+
; CHECK-NEXT: ret
112+
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
113+
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
114+
ret <4 x i32> %t1
115+
}
116+
117+
define <4 x i32> @add_const_const_sub_extrause(<4 x i32> %arg) {
118+
; CHECK-LABEL: add_const_const_sub_extrause:
119+
; CHECK: // %bb.0:
120+
; CHECK-NEXT: sub sp, sp, #32 // =32
121+
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
122+
; CHECK-NEXT: .cfi_def_cfa_offset 32
123+
; CHECK-NEXT: .cfi_offset w30, -16
124+
; CHECK-NEXT: movi v1.4s, #8
125+
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
126+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
127+
; CHECK-NEXT: bl use
128+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
129+
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
130+
; CHECK-NEXT: mvni v0.4s, #5
131+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
132+
; CHECK-NEXT: add sp, sp, #32 // =32
133+
; CHECK-NEXT: ret
134+
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
135+
call void @use(<4 x i32> %t0)
136+
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
137+
ret <4 x i32> %t1
138+
}
139+
140+
define <4 x i32> @add_const_const_sub_nonsplat(<4 x i32> %arg) {
141+
; CHECK-LABEL: add_const_const_sub_nonsplat:
142+
; CHECK: // %bb.0:
143+
; CHECK-NEXT: adrp x8, .LCPI8_0
144+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0]
145+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
146+
; CHECK-NEXT: ret
147+
%t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
148+
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
149+
ret <4 x i32> %t1
150+
}
151+
152+
; (x-c1)+c2
153+
154+
define <4 x i32> @sub_const_add_const(<4 x i32> %arg) {
155+
; CHECK-LABEL: sub_const_add_const:
156+
; CHECK: // %bb.0:
157+
; CHECK-NEXT: movi v1.4s, #8
158+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
159+
; CHECK-NEXT: movi v1.4s, #2
160+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
161+
; CHECK-NEXT: ret
162+
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
163+
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
164+
ret <4 x i32> %t1
165+
}
166+
167+
define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
168+
; CHECK-LABEL: sub_const_add_const_extrause:
169+
; CHECK: // %bb.0:
170+
; CHECK-NEXT: sub sp, sp, #32 // =32
171+
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
172+
; CHECK-NEXT: .cfi_def_cfa_offset 32
173+
; CHECK-NEXT: .cfi_offset w30, -16
174+
; CHECK-NEXT: movi v1.4s, #8
175+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
176+
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
177+
; CHECK-NEXT: bl use
178+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
179+
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
180+
; CHECK-NEXT: movi v0.4s, #2
181+
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
182+
; CHECK-NEXT: add sp, sp, #32 // =32
183+
; CHECK-NEXT: ret
184+
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
185+
call void @use(<4 x i32> %t0)
186+
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
187+
ret <4 x i32> %t1
188+
}
189+
190+
define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
191+
; CHECK-LABEL: sub_const_add_const_nonsplat:
192+
; CHECK: // %bb.0:
193+
; CHECK-NEXT: adrp x8, .LCPI11_0
194+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0]
195+
; CHECK-NEXT: adrp x8, .LCPI11_1
196+
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_1]
197+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
198+
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
199+
; CHECK-NEXT: ret
200+
%t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
201+
%t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
202+
ret <4 x i32> %t1
203+
}
204+
205+
; (x-c1)-c2
206+
207+
define <4 x i32> @sub_const_sub_const(<4 x i32> %arg) {
208+
; CHECK-LABEL: sub_const_sub_const:
209+
; CHECK: // %bb.0:
210+
; CHECK-NEXT: movi v1.4s, #8
211+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
212+
; CHECK-NEXT: movi v1.4s, #2
213+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
214+
; CHECK-NEXT: ret
215+
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
216+
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
217+
ret <4 x i32> %t1
218+
}
219+
220+
define <4 x i32> @sub_const_sub_const_extrause(<4 x i32> %arg) {
221+
; CHECK-LABEL: sub_const_sub_const_extrause:
222+
; CHECK: // %bb.0:
223+
; CHECK-NEXT: sub sp, sp, #32 // =32
224+
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
225+
; CHECK-NEXT: .cfi_def_cfa_offset 32
226+
; CHECK-NEXT: .cfi_offset w30, -16
227+
; CHECK-NEXT: movi v1.4s, #8
228+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
229+
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
230+
; CHECK-NEXT: bl use
231+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
232+
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
233+
; CHECK-NEXT: movi v0.4s, #2
234+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
235+
; CHECK-NEXT: add sp, sp, #32 // =32
236+
; CHECK-NEXT: ret
237+
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
238+
call void @use(<4 x i32> %t0)
239+
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
240+
ret <4 x i32> %t1
241+
}
242+
243+
define <4 x i32> @sub_const_sub_const_nonsplat(<4 x i32> %arg) {
244+
; CHECK-LABEL: sub_const_sub_const_nonsplat:
245+
; CHECK: // %bb.0:
246+
; CHECK-NEXT: adrp x8, .LCPI14_0
247+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
248+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
249+
; CHECK-NEXT: ret
250+
%t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
251+
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
252+
ret <4 x i32> %t1
253+
}
254+
255+
; c2-(x-c1)
256+
257+
define <4 x i32> @sub_const_const_sub(<4 x i32> %arg) {
258+
; CHECK-LABEL: sub_const_const_sub:
259+
; CHECK: // %bb.0:
260+
; CHECK-NEXT: movi v1.4s, #10
261+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
262+
; CHECK-NEXT: ret
263+
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
264+
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
265+
ret <4 x i32> %t1
266+
}
267+
268+
define <4 x i32> @sub_const_const_sub_extrause(<4 x i32> %arg) {
269+
; CHECK-LABEL: sub_const_const_sub_extrause:
270+
; CHECK: // %bb.0:
271+
; CHECK-NEXT: sub sp, sp, #32 // =32
272+
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
273+
; CHECK-NEXT: .cfi_def_cfa_offset 32
274+
; CHECK-NEXT: .cfi_offset w30, -16
275+
; CHECK-NEXT: movi v1.4s, #8
276+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
277+
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
278+
; CHECK-NEXT: bl use
279+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
280+
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
281+
; CHECK-NEXT: movi v0.4s, #2
282+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
283+
; CHECK-NEXT: add sp, sp, #32 // =32
284+
; CHECK-NEXT: ret
285+
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
286+
call void @use(<4 x i32> %t0)
287+
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
288+
ret <4 x i32> %t1
289+
}
290+
291+
define <4 x i32> @sub_const_const_sub_nonsplat(<4 x i32> %arg) {
292+
; CHECK-LABEL: sub_const_const_sub_nonsplat:
293+
; CHECK: // %bb.0:
294+
; CHECK-NEXT: adrp x8, .LCPI17_0
295+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
296+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
297+
; CHECK-NEXT: ret
298+
%t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
299+
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
300+
ret <4 x i32> %t1
301+
}
302+
303+
; (c1-x)+c2
304+
305+
define <4 x i32> @const_sub_add_const(<4 x i32> %arg) {
306+
; CHECK-LABEL: const_sub_add_const:
307+
; CHECK: // %bb.0:
308+
; CHECK-NEXT: movi v1.4s, #10
309+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
310+
; CHECK-NEXT: ret
311+
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
312+
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
313+
ret <4 x i32> %t1
314+
}
315+
316+
define <4 x i32> @const_sub_add_const_extrause(<4 x i32> %arg) {
317+
; CHECK-LABEL: const_sub_add_const_extrause:
318+
; CHECK: // %bb.0:
319+
; CHECK-NEXT: sub sp, sp, #32 // =32
320+
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
321+
; CHECK-NEXT: .cfi_def_cfa_offset 32
322+
; CHECK-NEXT: .cfi_offset w30, -16
323+
; CHECK-NEXT: movi v1.4s, #8
324+
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
325+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
326+
; CHECK-NEXT: bl use
327+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
328+
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
329+
; CHECK-NEXT: movi v0.4s, #10
330+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
331+
; CHECK-NEXT: add sp, sp, #32 // =32
332+
; CHECK-NEXT: ret
333+
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
334+
call void @use(<4 x i32> %t0)
335+
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
336+
ret <4 x i32> %t1
337+
}
338+
339+
define <4 x i32> @const_sub_add_const_nonsplat(<4 x i32> %arg) {
340+
; CHECK-LABEL: const_sub_add_const_nonsplat:
341+
; CHECK: // %bb.0:
342+
; CHECK-NEXT: adrp x8, .LCPI20_0
343+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0]
344+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
345+
; CHECK-NEXT: ret
346+
%t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
347+
%t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
348+
ret <4 x i32> %t1
349+
}
350+
351+
; (c1-x)-c2
352+
353+
define <4 x i32> @const_sub_sub_const(<4 x i32> %arg) {
354+
; CHECK-LABEL: const_sub_sub_const:
355+
; CHECK: // %bb.0:
356+
; CHECK-NEXT: movi v1.4s, #8
357+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
358+
; CHECK-NEXT: movi v1.4s, #2
359+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
360+
; CHECK-NEXT: ret
361+
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
362+
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
363+
ret <4 x i32> %t1
364+
}
365+
366+
define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
367+
; CHECK-LABEL: const_sub_sub_const_extrause:
368+
; CHECK: // %bb.0:
369+
; CHECK-NEXT: sub sp, sp, #32 // =32
370+
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
371+
; CHECK-NEXT: .cfi_def_cfa_offset 32
372+
; CHECK-NEXT: .cfi_offset w30, -16
373+
; CHECK-NEXT: movi v1.4s, #8
374+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
375+
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
376+
; CHECK-NEXT: bl use
377+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
378+
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
379+
; CHECK-NEXT: movi v0.4s, #2
380+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
381+
; CHECK-NEXT: add sp, sp, #32 // =32
382+
; CHECK-NEXT: ret
383+
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
384+
call void @use(<4 x i32> %t0)
385+
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
386+
ret <4 x i32> %t1
387+
}
388+
389+
define <4 x i32> @const_sub_sub_const_nonsplat(<4 x i32> %arg) {
390+
; CHECK-LABEL: const_sub_sub_const_nonsplat:
391+
; CHECK: // %bb.0:
392+
; CHECK-NEXT: adrp x8, .LCPI23_0
393+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0]
394+
; CHECK-NEXT: adrp x8, .LCPI23_1
395+
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_1]
396+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
397+
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
398+
; CHECK-NEXT: ret
399+
%t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
400+
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
401+
ret <4 x i32> %t1
402+
}
403+
404+
; c2-(c1-x)
405+
406+
define <4 x i32> @const_sub_const_sub(<4 x i32> %arg) {
407+
; CHECK-LABEL: const_sub_const_sub:
408+
; CHECK: // %bb.0:
409+
; CHECK-NEXT: movi v1.4s, #8
410+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
411+
; CHECK-NEXT: movi v1.4s, #2
412+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
413+
; CHECK-NEXT: ret
414+
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
415+
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
416+
ret <4 x i32> %t1
417+
}
418+
419+
define <4 x i32> @const_sub_const_sub_extrause(<4 x i32> %arg) {
420+
; CHECK-LABEL: const_sub_const_sub_extrause:
421+
; CHECK: // %bb.0:
422+
; CHECK-NEXT: sub sp, sp, #32 // =32
423+
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
424+
; CHECK-NEXT: .cfi_def_cfa_offset 32
425+
; CHECK-NEXT: .cfi_offset w30, -16
426+
; CHECK-NEXT: movi v1.4s, #8
427+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
428+
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
429+
; CHECK-NEXT: bl use
430+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
431+
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
432+
; CHECK-NEXT: movi v0.4s, #2
433+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
434+
; CHECK-NEXT: add sp, sp, #32 // =32
435+
; CHECK-NEXT: ret
436+
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
437+
call void @use(<4 x i32> %t0)
438+
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
439+
ret <4 x i32> %t1
440+
}
441+
442+
define <4 x i32> @const_sub_const_sub_nonsplat(<4 x i32> %arg) {
443+
; CHECK-LABEL: const_sub_const_sub_nonsplat:
444+
; CHECK: // %bb.0:
445+
; CHECK-NEXT: adrp x8, .LCPI26_0
446+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_0]
447+
; CHECK-NEXT: adrp x8, .LCPI26_1
448+
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_1]
449+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
450+
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
451+
; CHECK-NEXT: ret
452+
%t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
453+
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
454+
ret <4 x i32> %t1
455+
}

‎llvm/test/CodeGen/AArch64/vec_add.ll

-126
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,657 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,SSE,X86-SSE
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64,SSE,X64-SSE
4+
5+
declare void @use(<4 x i32> %arg)
6+
7+
; (x+c1)+c2
8+
9+
define <4 x i32> @add_const_add_const(<4 x i32> %arg) {
10+
; X86-LABEL: add_const_add_const:
11+
; X86: # %bb.0:
12+
; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
13+
; X86-NEXT: retl
14+
;
15+
; X64-LABEL: add_const_add_const:
16+
; X64: # %bb.0:
17+
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
18+
; X64-NEXT: retq
19+
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
20+
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
21+
ret <4 x i32> %t1
22+
}
23+
24+
define <4 x i32> @add_const_add_const_extrause(<4 x i32> %arg) {
25+
; X86-LABEL: add_const_add_const_extrause:
26+
; X86: # %bb.0:
27+
; X86-NEXT: subl $28, %esp
28+
; X86-NEXT: .cfi_def_cfa_offset 32
29+
; X86-NEXT: movdqa %xmm0, %xmm1
30+
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
31+
; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
32+
; X86-NEXT: paddd %xmm1, %xmm0
33+
; X86-NEXT: calll use
34+
; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
35+
; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
36+
; X86-NEXT: addl $28, %esp
37+
; X86-NEXT: .cfi_def_cfa_offset 4
38+
; X86-NEXT: retl
39+
;
40+
; X64-LABEL: add_const_add_const_extrause:
41+
; X64: # %bb.0:
42+
; X64-NEXT: subq $24, %rsp
43+
; X64-NEXT: .cfi_def_cfa_offset 32
44+
; X64-NEXT: movdqa %xmm0, %xmm1
45+
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
46+
; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
47+
; X64-NEXT: paddd %xmm1, %xmm0
48+
; X64-NEXT: callq use
49+
; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
50+
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
51+
; X64-NEXT: addq $24, %rsp
52+
; X64-NEXT: .cfi_def_cfa_offset 8
53+
; X64-NEXT: retq
54+
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
55+
call void @use(<4 x i32> %t0)
56+
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
57+
ret <4 x i32> %t1
58+
}
59+
60+
define <4 x i32> @add_const_add_const_nonsplat(<4 x i32> %arg) {
61+
; X86-LABEL: add_const_add_const_nonsplat:
62+
; X86: # %bb.0:
63+
; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
64+
; X86-NEXT: retl
65+
;
66+
; X64-LABEL: add_const_add_const_nonsplat:
67+
; X64: # %bb.0:
68+
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
69+
; X64-NEXT: retq
70+
%t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
71+
%t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
72+
ret <4 x i32> %t1
73+
}
74+
75+
; (x+c1)-c2
76+
77+
define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
78+
; X86-LABEL: add_const_sub_const:
79+
; X86: # %bb.0:
80+
; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
81+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
82+
; X86-NEXT: retl
83+
;
84+
; X64-LABEL: add_const_sub_const:
85+
; X64: # %bb.0:
86+
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
87+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
88+
; X64-NEXT: retq
89+
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
90+
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
91+
ret <4 x i32> %t1
92+
}
93+
94+
define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
95+
; X86-LABEL: add_const_sub_const_extrause:
96+
; X86: # %bb.0:
97+
; X86-NEXT: subl $28, %esp
98+
; X86-NEXT: .cfi_def_cfa_offset 32
99+
; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
100+
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
101+
; X86-NEXT: calll use
102+
; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
103+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
104+
; X86-NEXT: addl $28, %esp
105+
; X86-NEXT: .cfi_def_cfa_offset 4
106+
; X86-NEXT: retl
107+
;
108+
; X64-LABEL: add_const_sub_const_extrause:
109+
; X64: # %bb.0:
110+
; X64-NEXT: subq $24, %rsp
111+
; X64-NEXT: .cfi_def_cfa_offset 32
112+
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
113+
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
114+
; X64-NEXT: callq use
115+
; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
116+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
117+
; X64-NEXT: addq $24, %rsp
118+
; X64-NEXT: .cfi_def_cfa_offset 8
119+
; X64-NEXT: retq
120+
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
121+
call void @use(<4 x i32> %t0)
122+
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
123+
ret <4 x i32> %t1
124+
}
125+
126+
define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
127+
; X86-LABEL: add_const_sub_const_nonsplat:
128+
; X86: # %bb.0:
129+
; X86-NEXT: movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
130+
; X86-NEXT: psubd %xmm0, %xmm1
131+
; X86-NEXT: movdqa %xmm1, %xmm0
132+
; X86-NEXT: retl
133+
;
134+
; X64-LABEL: add_const_sub_const_nonsplat:
135+
; X64: # %bb.0:
136+
; X64-NEXT: movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
137+
; X64-NEXT: psubd %xmm0, %xmm1
138+
; X64-NEXT: movdqa %xmm1, %xmm0
139+
; X64-NEXT: retq
140+
%t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
141+
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
142+
ret <4 x i32> %t1
143+
}
144+
145+
; c2-(x+c1)
146+
147+
define <4 x i32> @add_const_const_sub(<4 x i32> %arg) {
148+
; X86-LABEL: add_const_const_sub:
149+
; X86: # %bb.0:
150+
; X86-NEXT: movdqa {{.*#+}} xmm1 = [4294967290,4294967290,4294967290,4294967290]
151+
; X86-NEXT: psubd %xmm0, %xmm1
152+
; X86-NEXT: movdqa %xmm1, %xmm0
153+
; X86-NEXT: retl
154+
;
155+
; X64-LABEL: add_const_const_sub:
156+
; X64: # %bb.0:
157+
; X64-NEXT: movdqa {{.*#+}} xmm1 = [4294967290,4294967290,4294967290,4294967290]
158+
; X64-NEXT: psubd %xmm0, %xmm1
159+
; X64-NEXT: movdqa %xmm1, %xmm0
160+
; X64-NEXT: retq
161+
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
162+
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
163+
ret <4 x i32> %t1
164+
}
165+
166+
define <4 x i32> @add_const_const_sub_extrause(<4 x i32> %arg) {
167+
; X86-LABEL: add_const_const_sub_extrause:
168+
; X86: # %bb.0:
169+
; X86-NEXT: subl $28, %esp
170+
; X86-NEXT: .cfi_def_cfa_offset 32
171+
; X86-NEXT: movdqa %xmm0, %xmm1
172+
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
173+
; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
174+
; X86-NEXT: paddd %xmm1, %xmm0
175+
; X86-NEXT: calll use
176+
; X86-NEXT: movdqa {{.*#+}} xmm0 = [4294967290,4294967290,4294967290,4294967290]
177+
; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
178+
; X86-NEXT: psubd %xmm1, %xmm0
179+
; X86-NEXT: addl $28, %esp
180+
; X86-NEXT: .cfi_def_cfa_offset 4
181+
; X86-NEXT: retl
182+
;
183+
; X64-LABEL: add_const_const_sub_extrause:
184+
; X64: # %bb.0:
185+
; X64-NEXT: subq $24, %rsp
186+
; X64-NEXT: .cfi_def_cfa_offset 32
187+
; X64-NEXT: movdqa %xmm0, %xmm1
188+
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
189+
; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
190+
; X64-NEXT: paddd %xmm1, %xmm0
191+
; X64-NEXT: callq use
192+
; X64-NEXT: movdqa {{.*#+}} xmm0 = [4294967290,4294967290,4294967290,4294967290]
193+
; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
194+
; X64-NEXT: addq $24, %rsp
195+
; X64-NEXT: .cfi_def_cfa_offset 8
196+
; X64-NEXT: retq
197+
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
198+
call void @use(<4 x i32> %t0)
199+
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
200+
ret <4 x i32> %t1
201+
}
202+
203+
define <4 x i32> @add_const_const_sub_nonsplat(<4 x i32> %arg) {
204+
; X86-LABEL: add_const_const_sub_nonsplat:
205+
; X86: # %bb.0:
206+
; X86-NEXT: movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
207+
; X86-NEXT: psubd %xmm0, %xmm1
208+
; X86-NEXT: movdqa %xmm1, %xmm0
209+
; X86-NEXT: retl
210+
;
211+
; X64-LABEL: add_const_const_sub_nonsplat:
212+
; X64: # %bb.0:
213+
; X64-NEXT: movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
214+
; X64-NEXT: psubd %xmm0, %xmm1
215+
; X64-NEXT: movdqa %xmm1, %xmm0
216+
; X64-NEXT: retq
217+
%t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
218+
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
219+
ret <4 x i32> %t1
220+
}
221+
222+
; (x-c1)+c2
223+
224+
define <4 x i32> @sub_const_add_const(<4 x i32> %arg) {
225+
; X86-LABEL: sub_const_add_const:
226+
; X86: # %bb.0:
227+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
228+
; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
229+
; X86-NEXT: retl
230+
;
231+
; X64-LABEL: sub_const_add_const:
232+
; X64: # %bb.0:
233+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
234+
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
235+
; X64-NEXT: retq
236+
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
237+
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
238+
ret <4 x i32> %t1
239+
}
240+
241+
define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
242+
; X86-LABEL: sub_const_add_const_extrause:
243+
; X86: # %bb.0:
244+
; X86-NEXT: subl $28, %esp
245+
; X86-NEXT: .cfi_def_cfa_offset 32
246+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
247+
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
248+
; X86-NEXT: calll use
249+
; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
250+
; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
251+
; X86-NEXT: addl $28, %esp
252+
; X86-NEXT: .cfi_def_cfa_offset 4
253+
; X86-NEXT: retl
254+
;
255+
; X64-LABEL: sub_const_add_const_extrause:
256+
; X64: # %bb.0:
257+
; X64-NEXT: subq $24, %rsp
258+
; X64-NEXT: .cfi_def_cfa_offset 32
259+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
260+
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
261+
; X64-NEXT: callq use
262+
; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
263+
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
264+
; X64-NEXT: addq $24, %rsp
265+
; X64-NEXT: .cfi_def_cfa_offset 8
266+
; X64-NEXT: retq
267+
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
268+
call void @use(<4 x i32> %t0)
269+
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
270+
ret <4 x i32> %t1
271+
}
272+
273+
define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
274+
; X86-LABEL: sub_const_add_const_nonsplat:
275+
; X86: # %bb.0:
276+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
277+
; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
278+
; X86-NEXT: retl
279+
;
280+
; X64-LABEL: sub_const_add_const_nonsplat:
281+
; X64: # %bb.0:
282+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
283+
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
284+
; X64-NEXT: retq
285+
%t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
286+
%t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
287+
ret <4 x i32> %t1
288+
}
289+
290+
; (x-c1)-c2
291+
292+
define <4 x i32> @sub_const_sub_const(<4 x i32> %arg) {
293+
; X86-LABEL: sub_const_sub_const:
294+
; X86: # %bb.0:
295+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
296+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
297+
; X86-NEXT: retl
298+
;
299+
; X64-LABEL: sub_const_sub_const:
300+
; X64: # %bb.0:
301+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
302+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
303+
; X64-NEXT: retq
304+
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
305+
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
306+
ret <4 x i32> %t1
307+
}
308+
309+
define <4 x i32> @sub_const_sub_const_extrause(<4 x i32> %arg) {
310+
; X86-LABEL: sub_const_sub_const_extrause:
311+
; X86: # %bb.0:
312+
; X86-NEXT: subl $28, %esp
313+
; X86-NEXT: .cfi_def_cfa_offset 32
314+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
315+
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
316+
; X86-NEXT: calll use
317+
; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
318+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
319+
; X86-NEXT: addl $28, %esp
320+
; X86-NEXT: .cfi_def_cfa_offset 4
321+
; X86-NEXT: retl
322+
;
323+
; X64-LABEL: sub_const_sub_const_extrause:
324+
; X64: # %bb.0:
325+
; X64-NEXT: subq $24, %rsp
326+
; X64-NEXT: .cfi_def_cfa_offset 32
327+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
328+
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
329+
; X64-NEXT: callq use
330+
; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
331+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
332+
; X64-NEXT: addq $24, %rsp
333+
; X64-NEXT: .cfi_def_cfa_offset 8
334+
; X64-NEXT: retq
335+
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
336+
call void @use(<4 x i32> %t0)
337+
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
338+
ret <4 x i32> %t1
339+
}
340+
341+
define <4 x i32> @sub_const_sub_const_nonsplat(<4 x i32> %arg) {
342+
; X86-LABEL: sub_const_sub_const_nonsplat:
343+
; X86: # %bb.0:
344+
; X86-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
345+
; X86-NEXT: psubd %xmm0, %xmm1
346+
; X86-NEXT: movdqa %xmm1, %xmm0
347+
; X86-NEXT: retl
348+
;
349+
; X64-LABEL: sub_const_sub_const_nonsplat:
350+
; X64: # %bb.0:
351+
; X64-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
352+
; X64-NEXT: psubd %xmm0, %xmm1
353+
; X64-NEXT: movdqa %xmm1, %xmm0
354+
; X64-NEXT: retq
355+
%t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
356+
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
357+
ret <4 x i32> %t1
358+
}
359+
360+
; c2-(x-c1)
361+
362+
define <4 x i32> @sub_const_const_sub(<4 x i32> %arg) {
363+
; X86-LABEL: sub_const_const_sub:
364+
; X86: # %bb.0:
365+
; X86-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
366+
; X86-NEXT: psubd %xmm0, %xmm1
367+
; X86-NEXT: movdqa %xmm1, %xmm0
368+
; X86-NEXT: retl
369+
;
370+
; X64-LABEL: sub_const_const_sub:
371+
; X64: # %bb.0:
372+
; X64-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
373+
; X64-NEXT: psubd %xmm0, %xmm1
374+
; X64-NEXT: movdqa %xmm1, %xmm0
375+
; X64-NEXT: retq
376+
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
377+
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
378+
ret <4 x i32> %t1
379+
}
380+
381+
define <4 x i32> @sub_const_const_sub_extrause(<4 x i32> %arg) {
382+
; X86-LABEL: sub_const_const_sub_extrause:
383+
; X86: # %bb.0:
384+
; X86-NEXT: subl $28, %esp
385+
; X86-NEXT: .cfi_def_cfa_offset 32
386+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
387+
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
388+
; X86-NEXT: calll use
389+
; X86-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
390+
; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
391+
; X86-NEXT: psubd %xmm1, %xmm0
392+
; X86-NEXT: addl $28, %esp
393+
; X86-NEXT: .cfi_def_cfa_offset 4
394+
; X86-NEXT: retl
395+
;
396+
; X64-LABEL: sub_const_const_sub_extrause:
397+
; X64: # %bb.0:
398+
; X64-NEXT: subq $24, %rsp
399+
; X64-NEXT: .cfi_def_cfa_offset 32
400+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
401+
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
402+
; X64-NEXT: callq use
403+
; X64-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
404+
; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
405+
; X64-NEXT: addq $24, %rsp
406+
; X64-NEXT: .cfi_def_cfa_offset 8
407+
; X64-NEXT: retq
408+
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
409+
call void @use(<4 x i32> %t0)
410+
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
411+
ret <4 x i32> %t1
412+
}
413+
414+
define <4 x i32> @sub_const_const_sub_nonsplat(<4 x i32> %arg) {
415+
; X86-LABEL: sub_const_const_sub_nonsplat:
416+
; X86: # %bb.0:
417+
; X86-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
418+
; X86-NEXT: psubd %xmm0, %xmm1
419+
; X86-NEXT: movdqa %xmm1, %xmm0
420+
; X86-NEXT: retl
421+
;
422+
; X64-LABEL: sub_const_const_sub_nonsplat:
423+
; X64: # %bb.0:
424+
; X64-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
425+
; X64-NEXT: psubd %xmm0, %xmm1
426+
; X64-NEXT: movdqa %xmm1, %xmm0
427+
; X64-NEXT: retq
428+
%t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
429+
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
430+
ret <4 x i32> %t1
431+
}
432+
433+
; (c1-x)+c2
434+
435+
define <4 x i32> @const_sub_add_const(<4 x i32> %arg) {
436+
; X86-LABEL: const_sub_add_const:
437+
; X86: # %bb.0:
438+
; X86-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
439+
; X86-NEXT: psubd %xmm0, %xmm1
440+
; X86-NEXT: movdqa %xmm1, %xmm0
441+
; X86-NEXT: retl
442+
;
443+
; X64-LABEL: const_sub_add_const:
444+
; X64: # %bb.0:
445+
; X64-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
446+
; X64-NEXT: psubd %xmm0, %xmm1
447+
; X64-NEXT: movdqa %xmm1, %xmm0
448+
; X64-NEXT: retq
449+
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
450+
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
451+
ret <4 x i32> %t1
452+
}
453+
454+
define <4 x i32> @const_sub_add_const_extrause(<4 x i32> %arg) {
455+
; X86-LABEL: const_sub_add_const_extrause:
456+
; X86: # %bb.0:
457+
; X86-NEXT: subl $28, %esp
458+
; X86-NEXT: .cfi_def_cfa_offset 32
459+
; X86-NEXT: movdqa %xmm0, %xmm1
460+
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
461+
; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
462+
; X86-NEXT: psubd %xmm1, %xmm0
463+
; X86-NEXT: calll use
464+
; X86-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
465+
; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
466+
; X86-NEXT: psubd %xmm1, %xmm0
467+
; X86-NEXT: addl $28, %esp
468+
; X86-NEXT: .cfi_def_cfa_offset 4
469+
; X86-NEXT: retl
470+
;
471+
; X64-LABEL: const_sub_add_const_extrause:
472+
; X64: # %bb.0:
473+
; X64-NEXT: subq $24, %rsp
474+
; X64-NEXT: .cfi_def_cfa_offset 32
475+
; X64-NEXT: movdqa %xmm0, %xmm1
476+
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
477+
; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
478+
; X64-NEXT: psubd %xmm1, %xmm0
479+
; X64-NEXT: callq use
480+
; X64-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
481+
; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
482+
; X64-NEXT: addq $24, %rsp
483+
; X64-NEXT: .cfi_def_cfa_offset 8
484+
; X64-NEXT: retq
485+
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
486+
call void @use(<4 x i32> %t0)
487+
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
488+
ret <4 x i32> %t1
489+
}
490+
491+
define <4 x i32> @const_sub_add_const_nonsplat(<4 x i32> %arg) {
492+
; X86-LABEL: const_sub_add_const_nonsplat:
493+
; X86: # %bb.0:
494+
; X86-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
495+
; X86-NEXT: psubd %xmm0, %xmm1
496+
; X86-NEXT: movdqa %xmm1, %xmm0
497+
; X86-NEXT: retl
498+
;
499+
; X64-LABEL: const_sub_add_const_nonsplat:
500+
; X64: # %bb.0:
501+
; X64-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
502+
; X64-NEXT: psubd %xmm0, %xmm1
503+
; X64-NEXT: movdqa %xmm1, %xmm0
504+
; X64-NEXT: retq
505+
%t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
506+
%t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
507+
ret <4 x i32> %t1
508+
}
509+
510+
; (c1-x)-c2
511+
512+
define <4 x i32> @const_sub_sub_const(<4 x i32> %arg) {
513+
; X86-LABEL: const_sub_sub_const:
514+
; X86: # %bb.0:
515+
; X86-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8]
516+
; X86-NEXT: psubd %xmm0, %xmm1
517+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm1
518+
; X86-NEXT: movdqa %xmm1, %xmm0
519+
; X86-NEXT: retl
520+
;
521+
; X64-LABEL: const_sub_sub_const:
522+
; X64: # %bb.0:
523+
; X64-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8]
524+
; X64-NEXT: psubd %xmm0, %xmm1
525+
; X64-NEXT: psubd {{.*}}(%rip), %xmm1
526+
; X64-NEXT: movdqa %xmm1, %xmm0
527+
; X64-NEXT: retq
528+
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
529+
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
530+
ret <4 x i32> %t1
531+
}
532+
533+
define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
534+
; X86-LABEL: const_sub_sub_const_extrause:
535+
; X86: # %bb.0:
536+
; X86-NEXT: subl $28, %esp
537+
; X86-NEXT: .cfi_def_cfa_offset 32
538+
; X86-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8]
539+
; X86-NEXT: psubd %xmm0, %xmm1
540+
; X86-NEXT: movdqu %xmm1, (%esp) # 16-byte Spill
541+
; X86-NEXT: movdqa %xmm1, %xmm0
542+
; X86-NEXT: calll use
543+
; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
544+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
545+
; X86-NEXT: addl $28, %esp
546+
; X86-NEXT: .cfi_def_cfa_offset 4
547+
; X86-NEXT: retl
548+
;
549+
; X64-LABEL: const_sub_sub_const_extrause:
550+
; X64: # %bb.0:
551+
; X64-NEXT: subq $24, %rsp
552+
; X64-NEXT: .cfi_def_cfa_offset 32
553+
; X64-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8]
554+
; X64-NEXT: psubd %xmm0, %xmm1
555+
; X64-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
556+
; X64-NEXT: movdqa %xmm1, %xmm0
557+
; X64-NEXT: callq use
558+
; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
559+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
560+
; X64-NEXT: addq $24, %rsp
561+
; X64-NEXT: .cfi_def_cfa_offset 8
562+
; X64-NEXT: retq
563+
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
564+
call void @use(<4 x i32> %t0)
565+
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
566+
ret <4 x i32> %t1
567+
}
568+
569+
define <4 x i32> @const_sub_sub_const_nonsplat(<4 x i32> %arg) {
570+
; X86-LABEL: const_sub_sub_const_nonsplat:
571+
; X86: # %bb.0:
572+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
573+
; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
574+
; X86-NEXT: retl
575+
;
576+
; X64-LABEL: const_sub_sub_const_nonsplat:
577+
; X64: # %bb.0:
578+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
579+
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
580+
; X64-NEXT: retq
581+
%t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
582+
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
583+
ret <4 x i32> %t1
584+
}
585+
586+
; c2-(c1-x)
587+
588+
define <4 x i32> @const_sub_const_sub(<4 x i32> %arg) {
589+
; X86-LABEL: const_sub_const_sub:
590+
; X86: # %bb.0:
591+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
592+
; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
593+
; X86-NEXT: retl
594+
;
595+
; X64-LABEL: const_sub_const_sub:
596+
; X64: # %bb.0:
597+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
598+
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
599+
; X64-NEXT: retq
600+
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
601+
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
602+
ret <4 x i32> %t1
603+
}
604+
605+
define <4 x i32> @const_sub_const_sub_extrause(<4 x i32> %arg) {
606+
; X86-LABEL: const_sub_const_sub_extrause:
607+
; X86: # %bb.0:
608+
; X86-NEXT: subl $28, %esp
609+
; X86-NEXT: .cfi_def_cfa_offset 32
610+
; X86-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8]
611+
; X86-NEXT: psubd %xmm0, %xmm1
612+
; X86-NEXT: movdqu %xmm1, (%esp) # 16-byte Spill
613+
; X86-NEXT: movdqa %xmm1, %xmm0
614+
; X86-NEXT: calll use
615+
; X86-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
616+
; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
617+
; X86-NEXT: psubd %xmm1, %xmm0
618+
; X86-NEXT: addl $28, %esp
619+
; X86-NEXT: .cfi_def_cfa_offset 4
620+
; X86-NEXT: retl
621+
;
622+
; X64-LABEL: const_sub_const_sub_extrause:
623+
; X64: # %bb.0:
624+
; X64-NEXT: subq $24, %rsp
625+
; X64-NEXT: .cfi_def_cfa_offset 32
626+
; X64-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8]
627+
; X64-NEXT: psubd %xmm0, %xmm1
628+
; X64-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
629+
; X64-NEXT: movdqa %xmm1, %xmm0
630+
; X64-NEXT: callq use
631+
; X64-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
632+
; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
633+
; X64-NEXT: addq $24, %rsp
634+
; X64-NEXT: .cfi_def_cfa_offset 8
635+
; X64-NEXT: retq
636+
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
637+
call void @use(<4 x i32> %t0)
638+
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
639+
ret <4 x i32> %t1
640+
}
641+
642+
define <4 x i32> @const_sub_const_sub_nonsplat(<4 x i32> %arg) {
643+
; X86-LABEL: const_sub_const_sub_nonsplat:
644+
; X86: # %bb.0:
645+
; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
646+
; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
647+
; X86-NEXT: retl
648+
;
649+
; X64-LABEL: const_sub_const_sub_nonsplat:
650+
; X64: # %bb.0:
651+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
652+
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
653+
; X64-NEXT: retq
654+
%t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
655+
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
656+
ret <4 x i32> %t1
657+
}

‎llvm/test/CodeGen/X86/vec_add.ll

-166
This file was deleted.

0 commit comments

Comments
 (0)
Please sign in to comment.