Skip to content

Commit 75001c9

Browse files
committedSep 28, 2017
[X86] Adding more cases to horizontal [f]add/[f]sub for avx512.
Reviewers: jbhateja Reviewed By: jbhateja Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D38344 llvm-svn: 314385
1 parent 47ad1ef commit 75001c9

File tree

1 file changed

+127
-16
lines changed

1 file changed

+127
-16
lines changed
 

‎llvm/test/CodeGen/X86/avx512-hadd-hsub.ll

+127-16
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,6 @@ define float @fhsub_16(<16 x float> %x225) {
111111
}
112112

113113
define <16 x i32> @hadd_16_3(<16 x i32> %x225, <16 x i32> %x227) {
114-
; CHECK-LABEL: hadd_16_3:
115-
; CHECK: # BB#0:
116-
; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0
117-
; CHECK-NEXT: retq
118114
; KNL-LABEL: hadd_16_3:
119115
; KNL: # BB#0:
120116
; KNL-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
@@ -138,10 +134,6 @@ define <16 x i32> @hadd_16_3(<16 x i32> %x225, <16 x i32> %x227) {
138134
}
139135

140136
define <16 x float> @fhadd_16_3(<16 x float> %x225, <16 x float> %x227) {
141-
; CHECK-LABEL: fhadd_16_3:
142-
; CHECK: # BB#0:
143-
; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0
144-
; CHECK-NEXT: retq
145137
; KNL-LABEL: fhadd_16_3:
146138
; KNL: # BB#0:
147139
; KNL-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
@@ -164,14 +156,6 @@ define <16 x float> @fhadd_16_3(<16 x float> %x225, <16 x float> %x227) {
164156
}
165157

166158
define <8 x double> @fhadd_16_4(<8 x double> %x225, <8 x double> %x227) {
167-
; CHECK-LABEL: fhadd_16_4:
168-
; CHECK: # BB#0:
169-
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
170-
; CHECK-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3]
171-
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
172-
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
173-
; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
174-
; CHECK-NEXT: retq
175159
; KNL-LABEL: fhadd_16_4:
176160
; KNL: # BB#0:
177161
; KNL-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
@@ -190,3 +174,130 @@ define <8 x double> @fhadd_16_4(<8 x double> %x225, <8 x double> %x227) {
190174
%x229 = fadd <8 x double> %x226, %x228
191175
ret <8 x double> %x229
192176
}
177+
178+
define <4 x double> @fadd_noundef_low(<8 x double> %x225, <8 x double> %x227) {
179+
; KNL-LABEL: fadd_noundef_low:
180+
; KNL: # BB#0:
181+
; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
182+
; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
183+
; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0
184+
; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
185+
; KNL-NEXT: retq
186+
;
187+
; SKX-LABEL: fadd_noundef_low:
188+
; SKX: # BB#0:
189+
; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
190+
; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
191+
; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0
192+
; SKX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
193+
; SKX-NEXT: retq
194+
%x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
195+
%x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
196+
%x229 = fadd <8 x double> %x226, %x228
197+
%x230 = shufflevector <8 x double> %x229, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
198+
ret <4 x double> %x230
199+
}
200+
201+
define <4 x double> @fadd_noundef_high(<8 x double> %x225, <8 x double> %x227) {
202+
; KNL-LABEL: fadd_noundef_high:
203+
; KNL: # BB#0:
204+
; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
205+
; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
206+
; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0
207+
; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
208+
; KNL-NEXT: retq
209+
;
210+
; SKX-LABEL: fadd_noundef_high:
211+
; SKX: # BB#0:
212+
; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
213+
; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
214+
; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0
215+
; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0
216+
; SKX-NEXT: retq
217+
%x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
218+
%x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
219+
%x229 = fadd <8 x double> %x226, %x228
220+
%x230 = shufflevector <8 x double> %x229, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
221+
ret <4 x double> %x230
222+
}
223+
224+
225+
define <8 x i32> @hadd_16_3_sv(<16 x i32> %x225, <16 x i32> %x227) {
226+
; KNL-LABEL: hadd_16_3_sv:
227+
; KNL: # BB#0:
228+
; KNL-NEXT: vshufps {{.*#+}} zmm2 = zmm0[0,2],zmm1[0,2],zmm0[4,6],zmm1[4,6],zmm0[8,10],zmm1[8,10],zmm0[12,14],zmm1[12,14]
229+
; KNL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[1,3],zmm1[1,3],zmm0[5,7],zmm1[5,7],zmm0[9,11],zmm1[9,11],zmm0[13,15],zmm1[13,15]
230+
; KNL-NEXT: vpaddd %zmm0, %zmm2, %zmm0
231+
; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
232+
; KNL-NEXT: retq
233+
;
234+
; SKX-LABEL: hadd_16_3_sv:
235+
; SKX: # BB#0:
236+
; SKX-NEXT: vshufps {{.*#+}} zmm2 = zmm0[0,2],zmm1[0,2],zmm0[4,6],zmm1[4,6],zmm0[8,10],zmm1[8,10],zmm0[12,14],zmm1[12,14]
237+
; SKX-NEXT: vshufps {{.*#+}} zmm0 = zmm0[1,3],zmm1[1,3],zmm0[5,7],zmm1[5,7],zmm0[9,11],zmm1[9,11],zmm0[13,15],zmm1[13,15]
238+
; SKX-NEXT: vpaddd %zmm0, %zmm2, %zmm0
239+
; SKX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
240+
; SKX-NEXT: retq
241+
%x226 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18
242+
, i32 4, i32 6, i32 20, i32 22, i32 8, i32 10, i32 24, i32 26, i32 12, i32 14, i32 28, i32 30>
243+
%x228 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 1, i32 3, i32 17, i32 19
244+
, i32 5 , i32 7, i32 21, i32 23, i32 9, i32 11, i32 25, i32 27, i32 13, i32 15,
245+
i32 29, i32 31>
246+
%x229 = add <16 x i32> %x226, %x228
247+
%x230 = shufflevector <16 x i32> %x229, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4 ,i32 5, i32 6, i32 7>
248+
ret <8 x i32> %x230
249+
}
250+
251+
252+
define double @fadd_noundef_eel(<8 x double> %x225, <8 x double> %x227) {
253+
; KNL-LABEL: fadd_noundef_eel:
254+
; KNL: # BB#0:
255+
; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
256+
; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
257+
; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0
258+
; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
259+
; KNL-NEXT: retq
260+
;
261+
; SKX-LABEL: fadd_noundef_eel:
262+
; SKX: # BB#0:
263+
; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
264+
; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
265+
; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0
266+
; SKX-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
267+
; SKX-NEXT: vzeroupper
268+
; SKX-NEXT: retq
269+
%x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
270+
%x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
271+
%x229 = fadd <8 x double> %x226, %x228
272+
%x230 = extractelement <8 x double> %x229, i32 0
273+
ret double %x230
274+
}
275+
276+
277+
278+
define double @fsub_noundef_ee (<8 x double> %x225, <8 x double> %x227) {
279+
; KNL-LABEL: fsub_noundef_ee:
280+
; KNL: # BB#0:
281+
; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
282+
; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
283+
; KNL-NEXT: vsubpd %zmm0, %zmm2, %zmm0
284+
; KNL-NEXT: vextractf32x4 $2, %zmm0, %xmm0
285+
; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
286+
; KNL-NEXT: retq
287+
;
288+
; SKX-LABEL: fsub_noundef_ee:
289+
; SKX: # BB#0:
290+
; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
291+
; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
292+
; SKX-NEXT: vsubpd %zmm0, %zmm2, %zmm0
293+
; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0
294+
; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
295+
; SKX-NEXT: vzeroupper
296+
; SKX-NEXT: retq
297+
%x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
298+
%x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
299+
%x229 = fsub <8 x double> %x226, %x228
300+
%x230 = extractelement <8 x double> %x229, i32 5
301+
ret double %x230
302+
}
303+

0 commit comments

Comments
 (0)
Please sign in to comment.