@@ -111,10 +111,6 @@ define float @fhsub_16(<16 x float> %x225) {
111
111
}
112
112
113
113
define <16 x i32 > @hadd_16_3 (<16 x i32 > %x225 , <16 x i32 > %x227 ) {
114
- ; CHECK-LABEL: hadd_16_3:
115
- ; CHECK: # BB#0:
116
- ; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0
117
- ; CHECK-NEXT: retq
118
114
; KNL-LABEL: hadd_16_3:
119
115
; KNL: # BB#0:
120
116
; KNL-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
@@ -138,10 +134,6 @@ define <16 x i32> @hadd_16_3(<16 x i32> %x225, <16 x i32> %x227) {
138
134
}
139
135
140
136
define <16 x float > @fhadd_16_3 (<16 x float > %x225 , <16 x float > %x227 ) {
141
- ; CHECK-LABEL: fhadd_16_3:
142
- ; CHECK: # BB#0:
143
- ; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0
144
- ; CHECK-NEXT: retq
145
137
; KNL-LABEL: fhadd_16_3:
146
138
; KNL: # BB#0:
147
139
; KNL-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
@@ -164,14 +156,6 @@ define <16 x float> @fhadd_16_3(<16 x float> %x225, <16 x float> %x227) {
164
156
}
165
157
166
158
define <8 x double > @fhadd_16_4 (<8 x double > %x225 , <8 x double > %x227 ) {
167
- ; CHECK-LABEL: fhadd_16_4:
168
- ; CHECK: # BB#0:
169
- ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
170
- ; CHECK-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3]
171
- ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
172
- ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
173
- ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
174
- ; CHECK-NEXT: retq
175
159
; KNL-LABEL: fhadd_16_4:
176
160
; KNL: # BB#0:
177
161
; KNL-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
@@ -190,3 +174,130 @@ define <8 x double> @fhadd_16_4(<8 x double> %x225, <8 x double> %x227) {
190
174
%x229 = fadd <8 x double > %x226 , %x228
191
175
ret <8 x double > %x229
192
176
}
177
+
178
+ define <4 x double > @fadd_noundef_low (<8 x double > %x225 , <8 x double > %x227 ) {
179
+ ; KNL-LABEL: fadd_noundef_low:
180
+ ; KNL: # BB#0:
181
+ ; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
182
+ ; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
183
+ ; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0
184
+ ; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
185
+ ; KNL-NEXT: retq
186
+ ;
187
+ ; SKX-LABEL: fadd_noundef_low:
188
+ ; SKX: # BB#0:
189
+ ; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
190
+ ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
191
+ ; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0
192
+ ; SKX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
193
+ ; SKX-NEXT: retq
194
+ %x226 = shufflevector <8 x double > %x225 , <8 x double > %x227 , <8 x i32 > <i32 0 , i32 8 , i32 2 , i32 10 , i32 4 , i32 12 , i32 6 , i32 14 >
195
+ %x228 = shufflevector <8 x double > %x225 , <8 x double > %x227 , <8 x i32 > <i32 1 , i32 9 , i32 3 , i32 11 , i32 5 ,i32 13 , i32 7 , i32 15 >
196
+ %x229 = fadd <8 x double > %x226 , %x228
197
+ %x230 = shufflevector <8 x double > %x229 , <8 x double > undef , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
198
+ ret <4 x double > %x230
199
+ }
200
+
201
+ define <4 x double > @fadd_noundef_high (<8 x double > %x225 , <8 x double > %x227 ) {
202
+ ; KNL-LABEL: fadd_noundef_high:
203
+ ; KNL: # BB#0:
204
+ ; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
205
+ ; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
206
+ ; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0
207
+ ; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
208
+ ; KNL-NEXT: retq
209
+ ;
210
+ ; SKX-LABEL: fadd_noundef_high:
211
+ ; SKX: # BB#0:
212
+ ; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
213
+ ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
214
+ ; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0
215
+ ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0
216
+ ; SKX-NEXT: retq
217
+ %x226 = shufflevector <8 x double > %x225 , <8 x double > %x227 , <8 x i32 > <i32 0 , i32 8 , i32 2 , i32 10 , i32 4 , i32 12 , i32 6 , i32 14 >
218
+ %x228 = shufflevector <8 x double > %x225 , <8 x double > %x227 , <8 x i32 > <i32 1 , i32 9 , i32 3 , i32 11 , i32 5 ,i32 13 , i32 7 , i32 15 >
219
+ %x229 = fadd <8 x double > %x226 , %x228
220
+ %x230 = shufflevector <8 x double > %x229 , <8 x double > undef , <4 x i32 > <i32 4 , i32 5 , i32 6 , i32 7 >
221
+ ret <4 x double > %x230
222
+ }
223
+
224
+
225
+ define <8 x i32 > @hadd_16_3_sv (<16 x i32 > %x225 , <16 x i32 > %x227 ) {
226
+ ; KNL-LABEL: hadd_16_3_sv:
227
+ ; KNL: # BB#0:
228
+ ; KNL-NEXT: vshufps {{.*#+}} zmm2 = zmm0[0,2],zmm1[0,2],zmm0[4,6],zmm1[4,6],zmm0[8,10],zmm1[8,10],zmm0[12,14],zmm1[12,14]
229
+ ; KNL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[1,3],zmm1[1,3],zmm0[5,7],zmm1[5,7],zmm0[9,11],zmm1[9,11],zmm0[13,15],zmm1[13,15]
230
+ ; KNL-NEXT: vpaddd %zmm0, %zmm2, %zmm0
231
+ ; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
232
+ ; KNL-NEXT: retq
233
+ ;
234
+ ; SKX-LABEL: hadd_16_3_sv:
235
+ ; SKX: # BB#0:
236
+ ; SKX-NEXT: vshufps {{.*#+}} zmm2 = zmm0[0,2],zmm1[0,2],zmm0[4,6],zmm1[4,6],zmm0[8,10],zmm1[8,10],zmm0[12,14],zmm1[12,14]
237
+ ; SKX-NEXT: vshufps {{.*#+}} zmm0 = zmm0[1,3],zmm1[1,3],zmm0[5,7],zmm1[5,7],zmm0[9,11],zmm1[9,11],zmm0[13,15],zmm1[13,15]
238
+ ; SKX-NEXT: vpaddd %zmm0, %zmm2, %zmm0
239
+ ; SKX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
240
+ ; SKX-NEXT: retq
241
+ %x226 = shufflevector <16 x i32 > %x225 , <16 x i32 > %x227 , <16 x i32 > <i32 0 , i32 2 , i32 16 , i32 18
242
+ , i32 4 , i32 6 , i32 20 , i32 22 , i32 8 , i32 10 , i32 24 , i32 26 , i32 12 , i32 14 , i32 28 , i32 30 >
243
+ %x228 = shufflevector <16 x i32 > %x225 , <16 x i32 > %x227 , <16 x i32 > <i32 1 , i32 3 , i32 17 , i32 19
244
+ , i32 5 , i32 7 , i32 21 , i32 23 , i32 9 , i32 11 , i32 25 , i32 27 , i32 13 , i32 15 ,
245
+ i32 29 , i32 31 >
246
+ %x229 = add <16 x i32 > %x226 , %x228
247
+ %x230 = shufflevector <16 x i32 > %x229 , <16 x i32 > undef , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 ,i32 5 , i32 6 , i32 7 >
248
+ ret <8 x i32 > %x230
249
+ }
250
+
251
+
252
+ define double @fadd_noundef_eel (<8 x double > %x225 , <8 x double > %x227 ) {
253
+ ; KNL-LABEL: fadd_noundef_eel:
254
+ ; KNL: # BB#0:
255
+ ; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
256
+ ; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
257
+ ; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0
258
+ ; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
259
+ ; KNL-NEXT: retq
260
+ ;
261
+ ; SKX-LABEL: fadd_noundef_eel:
262
+ ; SKX: # BB#0:
263
+ ; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
264
+ ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
265
+ ; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0
266
+ ; SKX-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
267
+ ; SKX-NEXT: vzeroupper
268
+ ; SKX-NEXT: retq
269
+ %x226 = shufflevector <8 x double > %x225 , <8 x double > %x227 , <8 x i32 > <i32 0 , i32 8 , i32 2 , i32 10 , i32 4 , i32 12 , i32 6 , i32 14 >
270
+ %x228 = shufflevector <8 x double > %x225 , <8 x double > %x227 , <8 x i32 > <i32 1 , i32 9 , i32 3 , i32 11 , i32 5 ,i32 13 , i32 7 , i32 15 >
271
+ %x229 = fadd <8 x double > %x226 , %x228
272
+ %x230 = extractelement <8 x double > %x229 , i32 0
273
+ ret double %x230
274
+ }
275
+
276
+
277
+
278
+ define double @fsub_noundef_ee (<8 x double > %x225 , <8 x double > %x227 ) {
279
+ ; KNL-LABEL: fsub_noundef_ee:
280
+ ; KNL: # BB#0:
281
+ ; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
282
+ ; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
283
+ ; KNL-NEXT: vsubpd %zmm0, %zmm2, %zmm0
284
+ ; KNL-NEXT: vextractf32x4 $2, %zmm0, %xmm0
285
+ ; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
286
+ ; KNL-NEXT: retq
287
+ ;
288
+ ; SKX-LABEL: fsub_noundef_ee:
289
+ ; SKX: # BB#0:
290
+ ; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
291
+ ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
292
+ ; SKX-NEXT: vsubpd %zmm0, %zmm2, %zmm0
293
+ ; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0
294
+ ; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
295
+ ; SKX-NEXT: vzeroupper
296
+ ; SKX-NEXT: retq
297
+ %x226 = shufflevector <8 x double > %x225 , <8 x double > %x227 , <8 x i32 > <i32 0 , i32 8 , i32 2 , i32 10 , i32 4 , i32 12 , i32 6 , i32 14 >
298
+ %x228 = shufflevector <8 x double > %x225 , <8 x double > %x227 , <8 x i32 > <i32 1 , i32 9 , i32 3 , i32 11 , i32 5 ,i32 13 , i32 7 , i32 15 >
299
+ %x229 = fsub <8 x double > %x226 , %x228
300
+ %x230 = extractelement <8 x double > %x229 , i32 5
301
+ ret double %x230
302
+ }
303
+
0 commit comments