Skip to content

Commit d9bb73b

Browse files
committedNov 28, 2015
[X86][FMA] Added 512-bit tests to match 128/256-bit tests coverage
As discussed on D14909 llvm-svn: 254233
1 parent 82f663d commit d9bb73b

File tree

1 file changed

+487
-0
lines changed

1 file changed

+487
-0
lines changed
 

‎llvm/test/CodeGen/X86/fma_patterns_wide.ll

+487
Original file line numberDiff line numberDiff line change
@@ -251,3 +251,490 @@ define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, <
251251
%res = fsub <8 x double> %y, %a2
252252
ret <8 x double> %res
253253
}
254+
255+
;
256+
; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
257+
;
258+
259+
define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) {
260+
; FMA-LABEL: test_v16f32_mul_add_x_one_y:
261+
; FMA: # BB#0:
262+
; FMA-NEXT: vfmadd213ps %ymm2, %ymm2, %ymm0
263+
; FMA-NEXT: vfmadd213ps %ymm3, %ymm3, %ymm1
264+
; FMA-NEXT: retq
265+
;
266+
; FMA4-LABEL: test_v16f32_mul_add_x_one_y:
267+
; FMA4: # BB#0:
268+
; FMA4-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
269+
; FMA4-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
270+
; FMA4-NEXT: retq
271+
;
272+
; AVX512-LABEL: test_v16f32_mul_add_x_one_y:
273+
; AVX512: # BB#0:
274+
; AVX512-NEXT: vfmadd213ps %zmm1, %zmm1, %zmm0
275+
; AVX512-NEXT: retq
276+
%a = fadd <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
277+
%m = fmul <16 x float> %a, %y
278+
ret <16 x float> %m
279+
}
280+
281+
define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) {
282+
; FMA-LABEL: test_v8f64_mul_y_add_x_one:
283+
; FMA: # BB#0:
284+
; FMA-NEXT: vfmadd213pd %ymm2, %ymm2, %ymm0
285+
; FMA-NEXT: vfmadd213pd %ymm3, %ymm3, %ymm1
286+
; FMA-NEXT: retq
287+
;
288+
; FMA4-LABEL: test_v8f64_mul_y_add_x_one:
289+
; FMA4: # BB#0:
290+
; FMA4-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
291+
; FMA4-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
292+
; FMA4-NEXT: retq
293+
;
294+
; AVX512-LABEL: test_v8f64_mul_y_add_x_one:
295+
; AVX512: # BB#0:
296+
; AVX512-NEXT: vfmadd213pd %zmm1, %zmm1, %zmm0
297+
; AVX512-NEXT: retq
298+
%a = fadd <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
299+
%m = fmul <8 x double> %y, %a
300+
ret <8 x double> %m
301+
}
302+
303+
define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) {
304+
; FMA-LABEL: test_v16f32_mul_add_x_negone_y:
305+
; FMA: # BB#0:
306+
; FMA-NEXT: vfmsub213ps %ymm2, %ymm2, %ymm0
307+
; FMA-NEXT: vfmsub213ps %ymm3, %ymm3, %ymm1
308+
; FMA-NEXT: retq
309+
;
310+
; FMA4-LABEL: test_v16f32_mul_add_x_negone_y:
311+
; FMA4: # BB#0:
312+
; FMA4-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
313+
; FMA4-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
314+
; FMA4-NEXT: retq
315+
;
316+
; AVX512-LABEL: test_v16f32_mul_add_x_negone_y:
317+
; AVX512: # BB#0:
318+
; AVX512-NEXT: vfmsub213ps %zmm1, %zmm1, %zmm0
319+
; AVX512-NEXT: retq
320+
%a = fadd <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
321+
%m = fmul <16 x float> %a, %y
322+
ret <16 x float> %m
323+
}
324+
325+
define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) {
326+
; FMA-LABEL: test_v8f64_mul_y_add_x_negone:
327+
; FMA: # BB#0:
328+
; FMA-NEXT: vfmsub213pd %ymm2, %ymm2, %ymm0
329+
; FMA-NEXT: vfmsub213pd %ymm3, %ymm3, %ymm1
330+
; FMA-NEXT: retq
331+
;
332+
; FMA4-LABEL: test_v8f64_mul_y_add_x_negone:
333+
; FMA4: # BB#0:
334+
; FMA4-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
335+
; FMA4-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
336+
; FMA4-NEXT: retq
337+
;
338+
; AVX512-LABEL: test_v8f64_mul_y_add_x_negone:
339+
; AVX512: # BB#0:
340+
; AVX512-NEXT: vfmsub213pd %zmm1, %zmm1, %zmm0
341+
; AVX512-NEXT: retq
342+
%a = fadd <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
343+
%m = fmul <8 x double> %y, %a
344+
ret <8 x double> %m
345+
}
346+
347+
define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) {
348+
; FMA-LABEL: test_v16f32_mul_sub_one_x_y:
349+
; FMA: # BB#0:
350+
; FMA-NEXT: vfnmadd213ps %ymm2, %ymm2, %ymm0
351+
; FMA-NEXT: vfnmadd213ps %ymm3, %ymm3, %ymm1
352+
; FMA-NEXT: retq
353+
;
354+
; FMA4-LABEL: test_v16f32_mul_sub_one_x_y:
355+
; FMA4: # BB#0:
356+
; FMA4-NEXT: vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0
357+
; FMA4-NEXT: vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1
358+
; FMA4-NEXT: retq
359+
;
360+
; AVX512-LABEL: test_v16f32_mul_sub_one_x_y:
361+
; AVX512: # BB#0:
362+
; AVX512-NEXT: vfnmadd213ps %zmm1, %zmm1, %zmm0
363+
; AVX512-NEXT: retq
364+
%s = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
365+
%m = fmul <16 x float> %s, %y
366+
ret <16 x float> %m
367+
}
368+
369+
define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) {
370+
; FMA-LABEL: test_v8f64_mul_y_sub_one_x:
371+
; FMA: # BB#0:
372+
; FMA-NEXT: vfnmadd213pd %ymm2, %ymm2, %ymm0
373+
; FMA-NEXT: vfnmadd213pd %ymm3, %ymm3, %ymm1
374+
; FMA-NEXT: retq
375+
;
376+
; FMA4-LABEL: test_v8f64_mul_y_sub_one_x:
377+
; FMA4: # BB#0:
378+
; FMA4-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0
379+
; FMA4-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1
380+
; FMA4-NEXT: retq
381+
;
382+
; AVX512-LABEL: test_v8f64_mul_y_sub_one_x:
383+
; AVX512: # BB#0:
384+
; AVX512-NEXT: vfnmadd213pd %zmm1, %zmm1, %zmm0
385+
; AVX512-NEXT: retq
386+
%s = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %x
387+
%m = fmul <8 x double> %y, %s
388+
ret <8 x double> %m
389+
}
390+
391+
define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) {
392+
; FMA-LABEL: test_v16f32_mul_sub_negone_x_y:
393+
; FMA: # BB#0:
394+
; FMA-NEXT: vfnmsub213ps %ymm2, %ymm2, %ymm0
395+
; FMA-NEXT: vfnmsub213ps %ymm3, %ymm3, %ymm1
396+
; FMA-NEXT: retq
397+
;
398+
; FMA4-LABEL: test_v16f32_mul_sub_negone_x_y:
399+
; FMA4: # BB#0:
400+
; FMA4-NEXT: vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0
401+
; FMA4-NEXT: vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1
402+
; FMA4-NEXT: retq
403+
;
404+
; AVX512-LABEL: test_v16f32_mul_sub_negone_x_y:
405+
; AVX512: # BB#0:
406+
; AVX512-NEXT: vfnmsub213ps %zmm1, %zmm1, %zmm0
407+
; AVX512-NEXT: retq
408+
%s = fsub <16 x float> <float -1.0, float -1.0, float -1.0, float -1.0,float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>, %x
409+
%m = fmul <16 x float> %s, %y
410+
ret <16 x float> %m
411+
}
412+
413+
define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) {
414+
; FMA-LABEL: test_v8f64_mul_y_sub_negone_x:
415+
; FMA: # BB#0:
416+
; FMA-NEXT: vfnmsub213pd %ymm2, %ymm2, %ymm0
417+
; FMA-NEXT: vfnmsub213pd %ymm3, %ymm3, %ymm1
418+
; FMA-NEXT: retq
419+
;
420+
; FMA4-LABEL: test_v8f64_mul_y_sub_negone_x:
421+
; FMA4: # BB#0:
422+
; FMA4-NEXT: vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0
423+
; FMA4-NEXT: vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1
424+
; FMA4-NEXT: retq
425+
;
426+
; AVX512-LABEL: test_v8f64_mul_y_sub_negone_x:
427+
; AVX512: # BB#0:
428+
; AVX512-NEXT: vfnmsub213pd %zmm1, %zmm1, %zmm0
429+
; AVX512-NEXT: retq
430+
%s = fsub <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x
431+
%m = fmul <8 x double> %y, %s
432+
ret <8 x double> %m
433+
}
434+
435+
define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {
436+
; FMA-LABEL: test_v16f32_mul_sub_x_one_y:
437+
; FMA: # BB#0:
438+
; FMA-NEXT: vfmsub213ps %ymm2, %ymm2, %ymm0
439+
; FMA-NEXT: vfmsub213ps %ymm3, %ymm3, %ymm1
440+
; FMA-NEXT: retq
441+
;
442+
; FMA4-LABEL: test_v16f32_mul_sub_x_one_y:
443+
; FMA4: # BB#0:
444+
; FMA4-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
445+
; FMA4-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
446+
; FMA4-NEXT: retq
447+
;
448+
; AVX512-LABEL: test_v16f32_mul_sub_x_one_y:
449+
; AVX512: # BB#0:
450+
; AVX512-NEXT: vfmsub213ps %zmm1, %zmm1, %zmm0
451+
; AVX512-NEXT: retq
452+
%s = fsub <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
453+
%m = fmul <16 x float> %s, %y
454+
ret <16 x float> %m
455+
}
456+
457+
define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {
458+
; FMA-LABEL: test_v8f64_mul_y_sub_x_one:
459+
; FMA: # BB#0:
460+
; FMA-NEXT: vfmsub213pd %ymm2, %ymm2, %ymm0
461+
; FMA-NEXT: vfmsub213pd %ymm3, %ymm3, %ymm1
462+
; FMA-NEXT: retq
463+
;
464+
; FMA4-LABEL: test_v8f64_mul_y_sub_x_one:
465+
; FMA4: # BB#0:
466+
; FMA4-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
467+
; FMA4-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
468+
; FMA4-NEXT: retq
469+
;
470+
; AVX512-LABEL: test_v8f64_mul_y_sub_x_one:
471+
; AVX512: # BB#0:
472+
; AVX512-NEXT: vfmsub213pd %zmm1, %zmm1, %zmm0
473+
; AVX512-NEXT: retq
474+
%s = fsub <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
475+
%m = fmul <8 x double> %y, %s
476+
ret <8 x double> %m
477+
}
478+
479+
define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {
480+
; FMA-LABEL: test_v16f32_mul_sub_x_negone_y:
481+
; FMA: # BB#0:
482+
; FMA-NEXT: vfmadd213ps %ymm2, %ymm2, %ymm0
483+
; FMA-NEXT: vfmadd213ps %ymm3, %ymm3, %ymm1
484+
; FMA-NEXT: retq
485+
;
486+
; FMA4-LABEL: test_v16f32_mul_sub_x_negone_y:
487+
; FMA4: # BB#0:
488+
; FMA4-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
489+
; FMA4-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
490+
; FMA4-NEXT: retq
491+
;
492+
; AVX512-LABEL: test_v16f32_mul_sub_x_negone_y:
493+
; AVX512: # BB#0:
494+
; AVX512-NEXT: vfmadd213ps %zmm1, %zmm1, %zmm0
495+
; AVX512-NEXT: retq
496+
%s = fsub <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
497+
%m = fmul <16 x float> %s, %y
498+
ret <16 x float> %m
499+
}
500+
501+
define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {
502+
; FMA-LABEL: test_v8f64_mul_y_sub_x_negone:
503+
; FMA: # BB#0:
504+
; FMA-NEXT: vfmadd213pd %ymm2, %ymm2, %ymm0
505+
; FMA-NEXT: vfmadd213pd %ymm3, %ymm3, %ymm1
506+
; FMA-NEXT: retq
507+
;
508+
; FMA4-LABEL: test_v8f64_mul_y_sub_x_negone:
509+
; FMA4: # BB#0:
510+
; FMA4-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
511+
; FMA4-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
512+
; FMA4-NEXT: retq
513+
;
514+
; AVX512-LABEL: test_v8f64_mul_y_sub_x_negone:
515+
; AVX512: # BB#0:
516+
; AVX512-NEXT: vfmadd213pd %zmm1, %zmm1, %zmm0
517+
; AVX512-NEXT: retq
518+
%s = fsub <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
519+
%m = fmul <8 x double> %y, %s
520+
ret <8 x double> %m
521+
}
522+
523+
;
524+
; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
525+
;
526+
527+
define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) {
528+
; FMA-LABEL: test_v16f32_interp:
529+
; FMA: # BB#0:
530+
; FMA-NEXT: vfnmadd213ps %ymm3, %ymm5, %ymm3
531+
; FMA-NEXT: vfnmadd213ps %ymm2, %ymm4, %ymm2
532+
; FMA-NEXT: vfmadd213ps %ymm2, %ymm4, %ymm0
533+
; FMA-NEXT: vfmadd213ps %ymm3, %ymm5, %ymm1
534+
; FMA-NEXT: retq
535+
;
536+
; FMA4-LABEL: test_v16f32_interp:
537+
; FMA4: # BB#0:
538+
; FMA4-NEXT: vfnmaddps %ymm3, %ymm3, %ymm5, %ymm3
539+
; FMA4-NEXT: vfnmaddps %ymm2, %ymm2, %ymm4, %ymm2
540+
; FMA4-NEXT: vfmaddps %ymm2, %ymm4, %ymm0, %ymm0
541+
; FMA4-NEXT: vfmaddps %ymm3, %ymm5, %ymm1, %ymm1
542+
; FMA4-NEXT: retq
543+
;
544+
; AVX512-LABEL: test_v16f32_interp:
545+
; AVX512: # BB#0:
546+
; AVX512-NEXT: vmovaps %zmm2, %zmm3
547+
; AVX512-NEXT: vfnmadd213ps %zmm1, %zmm1, %zmm3
548+
; AVX512-NEXT: vfmadd213ps %zmm3, %zmm2, %zmm0
549+
; AVX512-NEXT: retq
550+
%t1 = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
551+
%tx = fmul <16 x float> %x, %t
552+
%ty = fmul <16 x float> %y, %t1
553+
%r = fadd <16 x float> %tx, %ty
554+
ret <16 x float> %r
555+
}
556+
557+
define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) {
558+
; FMA-LABEL: test_v8f64_interp:
559+
; FMA: # BB#0:
560+
; FMA-NEXT: vfnmadd213pd %ymm3, %ymm5, %ymm3
561+
; FMA-NEXT: vfnmadd213pd %ymm2, %ymm4, %ymm2
562+
; FMA-NEXT: vfmadd213pd %ymm2, %ymm4, %ymm0
563+
; FMA-NEXT: vfmadd213pd %ymm3, %ymm5, %ymm1
564+
; FMA-NEXT: retq
565+
;
566+
; FMA4-LABEL: test_v8f64_interp:
567+
; FMA4: # BB#0:
568+
; FMA4-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm5, %ymm3
569+
; FMA4-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm4, %ymm2
570+
; FMA4-NEXT: vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0
571+
; FMA4-NEXT: vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1
572+
; FMA4-NEXT: retq
573+
;
574+
; AVX512-LABEL: test_v8f64_interp:
575+
; AVX512: # BB#0:
576+
; AVX512-NEXT: vmovaps %zmm2, %zmm3
577+
; AVX512-NEXT: vfnmadd213pd %zmm1, %zmm1, %zmm3
578+
; AVX512-NEXT: vfmadd213pd %zmm3, %zmm2, %zmm0
579+
; AVX512-NEXT: retq
580+
%t1 = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
581+
%tx = fmul <8 x double> %x, %t
582+
%ty = fmul <8 x double> %y, %t1
583+
%r = fadd <8 x double> %tx, %ty
584+
ret <8 x double> %r
585+
}
586+
587+
;
588+
; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
589+
;
590+
591+
define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
592+
; FMA-LABEL: test_v16f32_fneg_fmadd:
593+
; FMA: # BB#0:
594+
; FMA-NEXT: vfnmsub213ps %ymm4, %ymm2, %ymm0
595+
; FMA-NEXT: vfnmsub213ps %ymm5, %ymm3, %ymm1
596+
; FMA-NEXT: retq
597+
;
598+
; FMA4-LABEL: test_v16f32_fneg_fmadd:
599+
; FMA4: # BB#0:
600+
; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
601+
; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
602+
; FMA4-NEXT: retq
603+
;
604+
; AVX512-LABEL: test_v16f32_fneg_fmadd:
605+
; AVX512: # BB#0:
606+
; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
607+
; AVX512-NEXT: retq
608+
%mul = fmul <16 x float> %a0, %a1
609+
%add = fadd <16 x float> %mul, %a2
610+
%neg = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
611+
ret <16 x float> %neg
612+
}
613+
614+
define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
615+
; FMA-LABEL: test_v8f64_fneg_fmsub:
616+
; FMA: # BB#0:
617+
; FMA-NEXT: vfnmadd213pd %ymm4, %ymm2, %ymm0
618+
; FMA-NEXT: vfnmadd213pd %ymm5, %ymm3, %ymm1
619+
; FMA-NEXT: retq
620+
;
621+
; FMA4-LABEL: test_v8f64_fneg_fmsub:
622+
; FMA4: # BB#0:
623+
; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
624+
; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
625+
; FMA4-NEXT: retq
626+
;
627+
; AVX512-LABEL: test_v8f64_fneg_fmsub:
628+
; AVX512: # BB#0:
629+
; AVX512-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0
630+
; AVX512-NEXT: retq
631+
%mul = fmul <8 x double> %a0, %a1
632+
%sub = fsub <8 x double> %mul, %a2
633+
%neg = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
634+
ret <8 x double> %neg
635+
}
636+
637+
define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
638+
; FMA-LABEL: test_v16f32_fneg_fnmadd:
639+
; FMA: # BB#0:
640+
; FMA-NEXT: vfmsub213ps %ymm4, %ymm2, %ymm0
641+
; FMA-NEXT: vfmsub213ps %ymm5, %ymm3, %ymm1
642+
; FMA-NEXT: retq
643+
;
644+
; FMA4-LABEL: test_v16f32_fneg_fnmadd:
645+
; FMA4: # BB#0:
646+
; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
647+
; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
648+
; FMA4-NEXT: retq
649+
;
650+
; AVX512-LABEL: test_v16f32_fneg_fnmadd:
651+
; AVX512: # BB#0:
652+
; AVX512-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0
653+
; AVX512-NEXT: retq
654+
%mul = fmul <16 x float> %a0, %a1
655+
%neg0 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul
656+
%add = fadd <16 x float> %neg0, %a2
657+
%neg1 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
658+
ret <16 x float> %neg1
659+
}
660+
661+
define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
662+
; FMA-LABEL: test_v8f64_fneg_fnmsub:
663+
; FMA: # BB#0:
664+
; FMA-NEXT: vfmadd213pd %ymm4, %ymm2, %ymm0
665+
; FMA-NEXT: vfmadd213pd %ymm5, %ymm3, %ymm1
666+
; FMA-NEXT: retq
667+
;
668+
; FMA4-LABEL: test_v8f64_fneg_fnmsub:
669+
; FMA4: # BB#0:
670+
; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
671+
; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
672+
; FMA4-NEXT: retq
673+
;
674+
; AVX512-LABEL: test_v8f64_fneg_fnmsub:
675+
; AVX512: # BB#0:
676+
; AVX512-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0
677+
; AVX512-NEXT: retq
678+
%mul = fmul <8 x double> %a0, %a1
679+
%neg0 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul
680+
%sub = fsub <8 x double> %neg0, %a2
681+
%neg1 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
682+
ret <8 x double> %neg1
683+
}
684+
685+
;
686+
; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
687+
;
688+
689+
define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 {
690+
; FMA-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
691+
; FMA: # BB#0:
692+
; FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
693+
; FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
694+
; FMA-NEXT: retq
695+
;
696+
; FMA4-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
697+
; FMA4: # BB#0:
698+
; FMA4-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
699+
; FMA4-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
700+
; FMA4-NEXT: retq
701+
;
702+
; AVX512-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
703+
; AVX512: # BB#0:
704+
; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
705+
; AVX512-NEXT: retq
706+
%m0 = fmul <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0>
707+
%m1 = fmul <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
708+
%a = fadd <16 x float> %m0, %m1
709+
ret <16 x float> %a
710+
}
711+
712+
;
713+
; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
714+
;
715+
716+
define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float> %y) #0 {
717+
; FMA-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
718+
; FMA: # BB#0:
719+
; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %ymm2, %ymm0
720+
; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %ymm3, %ymm1
721+
; FMA-NEXT: retq
722+
;
723+
; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
724+
; FMA4: # BB#0:
725+
; FMA4-NEXT: vfmaddps %ymm2, {{.*}}(%rip), %ymm0, %ymm0
726+
; FMA4-NEXT: vfmaddps %ymm3, {{.*}}(%rip), %ymm1, %ymm1
727+
; FMA4-NEXT: retq
728+
;
729+
; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
730+
; AVX512: # BB#0:
731+
; AVX512-NEXT: vfmadd231ps {{.*}}(%rip), %zmm0, %zmm1
732+
; AVX512-NEXT: vmovaps %zmm1, %zmm0
733+
; AVX512-NEXT: retq
734+
%m0 = fmul <16 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
735+
%m1 = fmul <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
736+
%a = fadd <16 x float> %m1, %y
737+
ret <16 x float> %a
738+
}
739+
740+
attributes #0 = { "unsafe-fp-math"="true" }

0 commit comments

Comments
 (0)
Please sign in to comment.