@@ -251,3 +251,490 @@ define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, <
251
251
%res = fsub <8 x double > %y , %a2
252
252
ret <8 x double > %res
253
253
}
254
+
255
+ ;
256
+ ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
257
+ ;
258
+
259
+ define <16 x float > @test_v16f32_mul_add_x_one_y (<16 x float > %x , <16 x float > %y ) {
260
+ ; FMA-LABEL: test_v16f32_mul_add_x_one_y:
261
+ ; FMA: # BB#0:
262
+ ; FMA-NEXT: vfmadd213ps %ymm2, %ymm2, %ymm0
263
+ ; FMA-NEXT: vfmadd213ps %ymm3, %ymm3, %ymm1
264
+ ; FMA-NEXT: retq
265
+ ;
266
+ ; FMA4-LABEL: test_v16f32_mul_add_x_one_y:
267
+ ; FMA4: # BB#0:
268
+ ; FMA4-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
269
+ ; FMA4-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
270
+ ; FMA4-NEXT: retq
271
+ ;
272
+ ; AVX512-LABEL: test_v16f32_mul_add_x_one_y:
273
+ ; AVX512: # BB#0:
274
+ ; AVX512-NEXT: vfmadd213ps %zmm1, %zmm1, %zmm0
275
+ ; AVX512-NEXT: retq
276
+ %a = fadd <16 x float > %x , <float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 >
277
+ %m = fmul <16 x float > %a , %y
278
+ ret <16 x float > %m
279
+ }
280
+
281
+ define <8 x double > @test_v8f64_mul_y_add_x_one (<8 x double > %x , <8 x double > %y ) {
282
+ ; FMA-LABEL: test_v8f64_mul_y_add_x_one:
283
+ ; FMA: # BB#0:
284
+ ; FMA-NEXT: vfmadd213pd %ymm2, %ymm2, %ymm0
285
+ ; FMA-NEXT: vfmadd213pd %ymm3, %ymm3, %ymm1
286
+ ; FMA-NEXT: retq
287
+ ;
288
+ ; FMA4-LABEL: test_v8f64_mul_y_add_x_one:
289
+ ; FMA4: # BB#0:
290
+ ; FMA4-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
291
+ ; FMA4-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
292
+ ; FMA4-NEXT: retq
293
+ ;
294
+ ; AVX512-LABEL: test_v8f64_mul_y_add_x_one:
295
+ ; AVX512: # BB#0:
296
+ ; AVX512-NEXT: vfmadd213pd %zmm1, %zmm1, %zmm0
297
+ ; AVX512-NEXT: retq
298
+ %a = fadd <8 x double > %x , <double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 >
299
+ %m = fmul <8 x double > %y , %a
300
+ ret <8 x double > %m
301
+ }
302
+
303
+ define <16 x float > @test_v16f32_mul_add_x_negone_y (<16 x float > %x , <16 x float > %y ) {
304
+ ; FMA-LABEL: test_v16f32_mul_add_x_negone_y:
305
+ ; FMA: # BB#0:
306
+ ; FMA-NEXT: vfmsub213ps %ymm2, %ymm2, %ymm0
307
+ ; FMA-NEXT: vfmsub213ps %ymm3, %ymm3, %ymm1
308
+ ; FMA-NEXT: retq
309
+ ;
310
+ ; FMA4-LABEL: test_v16f32_mul_add_x_negone_y:
311
+ ; FMA4: # BB#0:
312
+ ; FMA4-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
313
+ ; FMA4-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
314
+ ; FMA4-NEXT: retq
315
+ ;
316
+ ; AVX512-LABEL: test_v16f32_mul_add_x_negone_y:
317
+ ; AVX512: # BB#0:
318
+ ; AVX512-NEXT: vfmsub213ps %zmm1, %zmm1, %zmm0
319
+ ; AVX512-NEXT: retq
320
+ %a = fadd <16 x float > %x , <float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 >
321
+ %m = fmul <16 x float > %a , %y
322
+ ret <16 x float > %m
323
+ }
324
+
325
+ define <8 x double > @test_v8f64_mul_y_add_x_negone (<8 x double > %x , <8 x double > %y ) {
326
+ ; FMA-LABEL: test_v8f64_mul_y_add_x_negone:
327
+ ; FMA: # BB#0:
328
+ ; FMA-NEXT: vfmsub213pd %ymm2, %ymm2, %ymm0
329
+ ; FMA-NEXT: vfmsub213pd %ymm3, %ymm3, %ymm1
330
+ ; FMA-NEXT: retq
331
+ ;
332
+ ; FMA4-LABEL: test_v8f64_mul_y_add_x_negone:
333
+ ; FMA4: # BB#0:
334
+ ; FMA4-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
335
+ ; FMA4-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
336
+ ; FMA4-NEXT: retq
337
+ ;
338
+ ; AVX512-LABEL: test_v8f64_mul_y_add_x_negone:
339
+ ; AVX512: # BB#0:
340
+ ; AVX512-NEXT: vfmsub213pd %zmm1, %zmm1, %zmm0
341
+ ; AVX512-NEXT: retq
342
+ %a = fadd <8 x double > %x , <double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 >
343
+ %m = fmul <8 x double > %y , %a
344
+ ret <8 x double > %m
345
+ }
346
+
347
+ define <16 x float > @test_v16f32_mul_sub_one_x_y (<16 x float > %x , <16 x float > %y ) {
348
+ ; FMA-LABEL: test_v16f32_mul_sub_one_x_y:
349
+ ; FMA: # BB#0:
350
+ ; FMA-NEXT: vfnmadd213ps %ymm2, %ymm2, %ymm0
351
+ ; FMA-NEXT: vfnmadd213ps %ymm3, %ymm3, %ymm1
352
+ ; FMA-NEXT: retq
353
+ ;
354
+ ; FMA4-LABEL: test_v16f32_mul_sub_one_x_y:
355
+ ; FMA4: # BB#0:
356
+ ; FMA4-NEXT: vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0
357
+ ; FMA4-NEXT: vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1
358
+ ; FMA4-NEXT: retq
359
+ ;
360
+ ; AVX512-LABEL: test_v16f32_mul_sub_one_x_y:
361
+ ; AVX512: # BB#0:
362
+ ; AVX512-NEXT: vfnmadd213ps %zmm1, %zmm1, %zmm0
363
+ ; AVX512-NEXT: retq
364
+ %s = fsub <16 x float > <float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 >, %x
365
+ %m = fmul <16 x float > %s , %y
366
+ ret <16 x float > %m
367
+ }
368
+
369
+ define <8 x double > @test_v8f64_mul_y_sub_one_x (<8 x double > %x , <8 x double > %y ) {
370
+ ; FMA-LABEL: test_v8f64_mul_y_sub_one_x:
371
+ ; FMA: # BB#0:
372
+ ; FMA-NEXT: vfnmadd213pd %ymm2, %ymm2, %ymm0
373
+ ; FMA-NEXT: vfnmadd213pd %ymm3, %ymm3, %ymm1
374
+ ; FMA-NEXT: retq
375
+ ;
376
+ ; FMA4-LABEL: test_v8f64_mul_y_sub_one_x:
377
+ ; FMA4: # BB#0:
378
+ ; FMA4-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0
379
+ ; FMA4-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1
380
+ ; FMA4-NEXT: retq
381
+ ;
382
+ ; AVX512-LABEL: test_v8f64_mul_y_sub_one_x:
383
+ ; AVX512: # BB#0:
384
+ ; AVX512-NEXT: vfnmadd213pd %zmm1, %zmm1, %zmm0
385
+ ; AVX512-NEXT: retq
386
+ %s = fsub <8 x double > <double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 >, %x
387
+ %m = fmul <8 x double > %y , %s
388
+ ret <8 x double > %m
389
+ }
390
+
391
+ define <16 x float > @test_v16f32_mul_sub_negone_x_y (<16 x float > %x , <16 x float > %y ) {
392
+ ; FMA-LABEL: test_v16f32_mul_sub_negone_x_y:
393
+ ; FMA: # BB#0:
394
+ ; FMA-NEXT: vfnmsub213ps %ymm2, %ymm2, %ymm0
395
+ ; FMA-NEXT: vfnmsub213ps %ymm3, %ymm3, %ymm1
396
+ ; FMA-NEXT: retq
397
+ ;
398
+ ; FMA4-LABEL: test_v16f32_mul_sub_negone_x_y:
399
+ ; FMA4: # BB#0:
400
+ ; FMA4-NEXT: vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0
401
+ ; FMA4-NEXT: vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1
402
+ ; FMA4-NEXT: retq
403
+ ;
404
+ ; AVX512-LABEL: test_v16f32_mul_sub_negone_x_y:
405
+ ; AVX512: # BB#0:
406
+ ; AVX512-NEXT: vfnmsub213ps %zmm1, %zmm1, %zmm0
407
+ ; AVX512-NEXT: retq
408
+ %s = fsub <16 x float > <float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 ,float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 >, %x
409
+ %m = fmul <16 x float > %s , %y
410
+ ret <16 x float > %m
411
+ }
412
+
413
+ define <8 x double > @test_v8f64_mul_y_sub_negone_x (<8 x double > %x , <8 x double > %y ) {
414
+ ; FMA-LABEL: test_v8f64_mul_y_sub_negone_x:
415
+ ; FMA: # BB#0:
416
+ ; FMA-NEXT: vfnmsub213pd %ymm2, %ymm2, %ymm0
417
+ ; FMA-NEXT: vfnmsub213pd %ymm3, %ymm3, %ymm1
418
+ ; FMA-NEXT: retq
419
+ ;
420
+ ; FMA4-LABEL: test_v8f64_mul_y_sub_negone_x:
421
+ ; FMA4: # BB#0:
422
+ ; FMA4-NEXT: vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0
423
+ ; FMA4-NEXT: vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1
424
+ ; FMA4-NEXT: retq
425
+ ;
426
+ ; AVX512-LABEL: test_v8f64_mul_y_sub_negone_x:
427
+ ; AVX512: # BB#0:
428
+ ; AVX512-NEXT: vfnmsub213pd %zmm1, %zmm1, %zmm0
429
+ ; AVX512-NEXT: retq
430
+ %s = fsub <8 x double > <double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 >, %x
431
+ %m = fmul <8 x double > %y , %s
432
+ ret <8 x double > %m
433
+ }
434
+
435
+ define <16 x float > @test_v16f32_mul_sub_x_one_y (<16 x float > %x , <16 x float > %y ) {
436
+ ; FMA-LABEL: test_v16f32_mul_sub_x_one_y:
437
+ ; FMA: # BB#0:
438
+ ; FMA-NEXT: vfmsub213ps %ymm2, %ymm2, %ymm0
439
+ ; FMA-NEXT: vfmsub213ps %ymm3, %ymm3, %ymm1
440
+ ; FMA-NEXT: retq
441
+ ;
442
+ ; FMA4-LABEL: test_v16f32_mul_sub_x_one_y:
443
+ ; FMA4: # BB#0:
444
+ ; FMA4-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
445
+ ; FMA4-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
446
+ ; FMA4-NEXT: retq
447
+ ;
448
+ ; AVX512-LABEL: test_v16f32_mul_sub_x_one_y:
449
+ ; AVX512: # BB#0:
450
+ ; AVX512-NEXT: vfmsub213ps %zmm1, %zmm1, %zmm0
451
+ ; AVX512-NEXT: retq
452
+ %s = fsub <16 x float > %x , <float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 >
453
+ %m = fmul <16 x float > %s , %y
454
+ ret <16 x float > %m
455
+ }
456
+
457
+ define <8 x double > @test_v8f64_mul_y_sub_x_one (<8 x double > %x , <8 x double > %y ) {
458
+ ; FMA-LABEL: test_v8f64_mul_y_sub_x_one:
459
+ ; FMA: # BB#0:
460
+ ; FMA-NEXT: vfmsub213pd %ymm2, %ymm2, %ymm0
461
+ ; FMA-NEXT: vfmsub213pd %ymm3, %ymm3, %ymm1
462
+ ; FMA-NEXT: retq
463
+ ;
464
+ ; FMA4-LABEL: test_v8f64_mul_y_sub_x_one:
465
+ ; FMA4: # BB#0:
466
+ ; FMA4-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
467
+ ; FMA4-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
468
+ ; FMA4-NEXT: retq
469
+ ;
470
+ ; AVX512-LABEL: test_v8f64_mul_y_sub_x_one:
471
+ ; AVX512: # BB#0:
472
+ ; AVX512-NEXT: vfmsub213pd %zmm1, %zmm1, %zmm0
473
+ ; AVX512-NEXT: retq
474
+ %s = fsub <8 x double > %x , <double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 >
475
+ %m = fmul <8 x double > %y , %s
476
+ ret <8 x double > %m
477
+ }
478
+
479
+ define <16 x float > @test_v16f32_mul_sub_x_negone_y (<16 x float > %x , <16 x float > %y ) {
480
+ ; FMA-LABEL: test_v16f32_mul_sub_x_negone_y:
481
+ ; FMA: # BB#0:
482
+ ; FMA-NEXT: vfmadd213ps %ymm2, %ymm2, %ymm0
483
+ ; FMA-NEXT: vfmadd213ps %ymm3, %ymm3, %ymm1
484
+ ; FMA-NEXT: retq
485
+ ;
486
+ ; FMA4-LABEL: test_v16f32_mul_sub_x_negone_y:
487
+ ; FMA4: # BB#0:
488
+ ; FMA4-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
489
+ ; FMA4-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
490
+ ; FMA4-NEXT: retq
491
+ ;
492
+ ; AVX512-LABEL: test_v16f32_mul_sub_x_negone_y:
493
+ ; AVX512: # BB#0:
494
+ ; AVX512-NEXT: vfmadd213ps %zmm1, %zmm1, %zmm0
495
+ ; AVX512-NEXT: retq
496
+ %s = fsub <16 x float > %x , <float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 , float -1 .0 >
497
+ %m = fmul <16 x float > %s , %y
498
+ ret <16 x float > %m
499
+ }
500
+
501
+ define <8 x double > @test_v8f64_mul_y_sub_x_negone (<8 x double > %x , <8 x double > %y ) {
502
+ ; FMA-LABEL: test_v8f64_mul_y_sub_x_negone:
503
+ ; FMA: # BB#0:
504
+ ; FMA-NEXT: vfmadd213pd %ymm2, %ymm2, %ymm0
505
+ ; FMA-NEXT: vfmadd213pd %ymm3, %ymm3, %ymm1
506
+ ; FMA-NEXT: retq
507
+ ;
508
+ ; FMA4-LABEL: test_v8f64_mul_y_sub_x_negone:
509
+ ; FMA4: # BB#0:
510
+ ; FMA4-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
511
+ ; FMA4-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
512
+ ; FMA4-NEXT: retq
513
+ ;
514
+ ; AVX512-LABEL: test_v8f64_mul_y_sub_x_negone:
515
+ ; AVX512: # BB#0:
516
+ ; AVX512-NEXT: vfmadd213pd %zmm1, %zmm1, %zmm0
517
+ ; AVX512-NEXT: retq
518
+ %s = fsub <8 x double > %x , <double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 , double -1 .0 >
519
+ %m = fmul <8 x double > %y , %s
520
+ ret <8 x double > %m
521
+ }
522
+
523
+ ;
524
+ ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
525
+ ;
526
+
527
+ define <16 x float > @test_v16f32_interp (<16 x float > %x , <16 x float > %y , <16 x float > %t ) {
528
+ ; FMA-LABEL: test_v16f32_interp:
529
+ ; FMA: # BB#0:
530
+ ; FMA-NEXT: vfnmadd213ps %ymm3, %ymm5, %ymm3
531
+ ; FMA-NEXT: vfnmadd213ps %ymm2, %ymm4, %ymm2
532
+ ; FMA-NEXT: vfmadd213ps %ymm2, %ymm4, %ymm0
533
+ ; FMA-NEXT: vfmadd213ps %ymm3, %ymm5, %ymm1
534
+ ; FMA-NEXT: retq
535
+ ;
536
+ ; FMA4-LABEL: test_v16f32_interp:
537
+ ; FMA4: # BB#0:
538
+ ; FMA4-NEXT: vfnmaddps %ymm3, %ymm3, %ymm5, %ymm3
539
+ ; FMA4-NEXT: vfnmaddps %ymm2, %ymm2, %ymm4, %ymm2
540
+ ; FMA4-NEXT: vfmaddps %ymm2, %ymm4, %ymm0, %ymm0
541
+ ; FMA4-NEXT: vfmaddps %ymm3, %ymm5, %ymm1, %ymm1
542
+ ; FMA4-NEXT: retq
543
+ ;
544
+ ; AVX512-LABEL: test_v16f32_interp:
545
+ ; AVX512: # BB#0:
546
+ ; AVX512-NEXT: vmovaps %zmm2, %zmm3
547
+ ; AVX512-NEXT: vfnmadd213ps %zmm1, %zmm1, %zmm3
548
+ ; AVX512-NEXT: vfmadd213ps %zmm3, %zmm2, %zmm0
549
+ ; AVX512-NEXT: retq
550
+ %t1 = fsub <16 x float > <float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 >, %t
551
+ %tx = fmul <16 x float > %x , %t
552
+ %ty = fmul <16 x float > %y , %t1
553
+ %r = fadd <16 x float > %tx , %ty
554
+ ret <16 x float > %r
555
+ }
556
+
557
+ define <8 x double > @test_v8f64_interp (<8 x double > %x , <8 x double > %y , <8 x double > %t ) {
558
+ ; FMA-LABEL: test_v8f64_interp:
559
+ ; FMA: # BB#0:
560
+ ; FMA-NEXT: vfnmadd213pd %ymm3, %ymm5, %ymm3
561
+ ; FMA-NEXT: vfnmadd213pd %ymm2, %ymm4, %ymm2
562
+ ; FMA-NEXT: vfmadd213pd %ymm2, %ymm4, %ymm0
563
+ ; FMA-NEXT: vfmadd213pd %ymm3, %ymm5, %ymm1
564
+ ; FMA-NEXT: retq
565
+ ;
566
+ ; FMA4-LABEL: test_v8f64_interp:
567
+ ; FMA4: # BB#0:
568
+ ; FMA4-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm5, %ymm3
569
+ ; FMA4-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm4, %ymm2
570
+ ; FMA4-NEXT: vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0
571
+ ; FMA4-NEXT: vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1
572
+ ; FMA4-NEXT: retq
573
+ ;
574
+ ; AVX512-LABEL: test_v8f64_interp:
575
+ ; AVX512: # BB#0:
576
+ ; AVX512-NEXT: vmovaps %zmm2, %zmm3
577
+ ; AVX512-NEXT: vfnmadd213pd %zmm1, %zmm1, %zmm3
578
+ ; AVX512-NEXT: vfmadd213pd %zmm3, %zmm2, %zmm0
579
+ ; AVX512-NEXT: retq
580
+ %t1 = fsub <8 x double > <double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 , double 1 .0 >, %t
581
+ %tx = fmul <8 x double > %x , %t
582
+ %ty = fmul <8 x double > %y , %t1
583
+ %r = fadd <8 x double > %tx , %ty
584
+ ret <8 x double > %r
585
+ }
586
+
587
+ ;
588
+ ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
589
+ ;
590
+
591
+ define <16 x float > @test_v16f32_fneg_fmadd (<16 x float > %a0 , <16 x float > %a1 , <16 x float > %a2 ) #0 {
592
+ ; FMA-LABEL: test_v16f32_fneg_fmadd:
593
+ ; FMA: # BB#0:
594
+ ; FMA-NEXT: vfnmsub213ps %ymm4, %ymm2, %ymm0
595
+ ; FMA-NEXT: vfnmsub213ps %ymm5, %ymm3, %ymm1
596
+ ; FMA-NEXT: retq
597
+ ;
598
+ ; FMA4-LABEL: test_v16f32_fneg_fmadd:
599
+ ; FMA4: # BB#0:
600
+ ; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
601
+ ; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
602
+ ; FMA4-NEXT: retq
603
+ ;
604
+ ; AVX512-LABEL: test_v16f32_fneg_fmadd:
605
+ ; AVX512: # BB#0:
606
+ ; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
607
+ ; AVX512-NEXT: retq
608
+ %mul = fmul <16 x float > %a0 , %a1
609
+ %add = fadd <16 x float > %mul , %a2
610
+ %neg = fsub <16 x float > <float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 >, %add
611
+ ret <16 x float > %neg
612
+ }
613
+
614
+ define <8 x double > @test_v8f64_fneg_fmsub (<8 x double > %a0 , <8 x double > %a1 , <8 x double > %a2 ) #0 {
615
+ ; FMA-LABEL: test_v8f64_fneg_fmsub:
616
+ ; FMA: # BB#0:
617
+ ; FMA-NEXT: vfnmadd213pd %ymm4, %ymm2, %ymm0
618
+ ; FMA-NEXT: vfnmadd213pd %ymm5, %ymm3, %ymm1
619
+ ; FMA-NEXT: retq
620
+ ;
621
+ ; FMA4-LABEL: test_v8f64_fneg_fmsub:
622
+ ; FMA4: # BB#0:
623
+ ; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
624
+ ; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
625
+ ; FMA4-NEXT: retq
626
+ ;
627
+ ; AVX512-LABEL: test_v8f64_fneg_fmsub:
628
+ ; AVX512: # BB#0:
629
+ ; AVX512-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0
630
+ ; AVX512-NEXT: retq
631
+ %mul = fmul <8 x double > %a0 , %a1
632
+ %sub = fsub <8 x double > %mul , %a2
633
+ %neg = fsub <8 x double > <double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 >, %sub
634
+ ret <8 x double > %neg
635
+ }
636
+
637
+ define <16 x float > @test_v16f32_fneg_fnmadd (<16 x float > %a0 , <16 x float > %a1 , <16 x float > %a2 ) #0 {
638
+ ; FMA-LABEL: test_v16f32_fneg_fnmadd:
639
+ ; FMA: # BB#0:
640
+ ; FMA-NEXT: vfmsub213ps %ymm4, %ymm2, %ymm0
641
+ ; FMA-NEXT: vfmsub213ps %ymm5, %ymm3, %ymm1
642
+ ; FMA-NEXT: retq
643
+ ;
644
+ ; FMA4-LABEL: test_v16f32_fneg_fnmadd:
645
+ ; FMA4: # BB#0:
646
+ ; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
647
+ ; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
648
+ ; FMA4-NEXT: retq
649
+ ;
650
+ ; AVX512-LABEL: test_v16f32_fneg_fnmadd:
651
+ ; AVX512: # BB#0:
652
+ ; AVX512-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0
653
+ ; AVX512-NEXT: retq
654
+ %mul = fmul <16 x float > %a0 , %a1
655
+ %neg0 = fsub <16 x float > <float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 >, %mul
656
+ %add = fadd <16 x float > %neg0 , %a2
657
+ %neg1 = fsub <16 x float > <float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 >, %add
658
+ ret <16 x float > %neg1
659
+ }
660
+
661
+ define <8 x double > @test_v8f64_fneg_fnmsub (<8 x double > %a0 , <8 x double > %a1 , <8 x double > %a2 ) #0 {
662
+ ; FMA-LABEL: test_v8f64_fneg_fnmsub:
663
+ ; FMA: # BB#0:
664
+ ; FMA-NEXT: vfmadd213pd %ymm4, %ymm2, %ymm0
665
+ ; FMA-NEXT: vfmadd213pd %ymm5, %ymm3, %ymm1
666
+ ; FMA-NEXT: retq
667
+ ;
668
+ ; FMA4-LABEL: test_v8f64_fneg_fnmsub:
669
+ ; FMA4: # BB#0:
670
+ ; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
671
+ ; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
672
+ ; FMA4-NEXT: retq
673
+ ;
674
+ ; AVX512-LABEL: test_v8f64_fneg_fnmsub:
675
+ ; AVX512: # BB#0:
676
+ ; AVX512-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0
677
+ ; AVX512-NEXT: retq
678
+ %mul = fmul <8 x double > %a0 , %a1
679
+ %neg0 = fsub <8 x double > <double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 >, %mul
680
+ %sub = fsub <8 x double > %neg0 , %a2
681
+ %neg1 = fsub <8 x double > <double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 >, %sub
682
+ ret <8 x double > %neg1
683
+ }
684
+
685
+ ;
686
+ ; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
687
+ ;
688
+
689
+ define <16 x float > @test_v16f32_fma_x_c1_fmul_x_c2 (<16 x float > %x ) #0 {
690
+ ; FMA-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
691
+ ; FMA: # BB#0:
692
+ ; FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
693
+ ; FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
694
+ ; FMA-NEXT: retq
695
+ ;
696
+ ; FMA4-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
697
+ ; FMA4: # BB#0:
698
+ ; FMA4-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
699
+ ; FMA4-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
700
+ ; FMA4-NEXT: retq
701
+ ;
702
+ ; AVX512-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
703
+ ; AVX512: # BB#0:
704
+ ; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
705
+ ; AVX512-NEXT: retq
706
+ %m0 = fmul <16 x float > %x , <float 17 .0 , float 16 .0 , float 15 .0 , float 14 .0 , float 13 .0 , float 12 .0 , float 11 .0 , float 10 .0 , float 9 .0 , float 8 .0 , float 7 .0 , float 6 .0 , float 5 .0 , float 4 .0 , float 3 .0 , float 2 .0 >
707
+ %m1 = fmul <16 x float > %x , <float 16 .0 , float 15 .0 , float 14 .0 , float 13 .0 , float 12 .0 , float 11 .0 , float 10 .0 , float 9 .0 , float 8 .0 , float 7 .0 , float 6 .0 , float 5 .0 , float 4 .0 , float 3 .0 , float 2 .0 , float 1 .0 >
708
+ %a = fadd <16 x float > %m0 , %m1
709
+ ret <16 x float > %a
710
+ }
711
+
712
+ ;
713
+ ; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
714
+ ;
715
+
716
+ define <16 x float > @test_v16f32_fma_fmul_x_c1_c2_y (<16 x float > %x , <16 x float > %y ) #0 {
717
+ ; FMA-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
718
+ ; FMA: # BB#0:
719
+ ; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %ymm2, %ymm0
720
+ ; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %ymm3, %ymm1
721
+ ; FMA-NEXT: retq
722
+ ;
723
+ ; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
724
+ ; FMA4: # BB#0:
725
+ ; FMA4-NEXT: vfmaddps %ymm2, {{.*}}(%rip), %ymm0, %ymm0
726
+ ; FMA4-NEXT: vfmaddps %ymm3, {{.*}}(%rip), %ymm1, %ymm1
727
+ ; FMA4-NEXT: retq
728
+ ;
729
+ ; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
730
+ ; AVX512: # BB#0:
731
+ ; AVX512-NEXT: vfmadd231ps {{.*}}(%rip), %zmm0, %zmm1
732
+ ; AVX512-NEXT: vmovaps %zmm1, %zmm0
733
+ ; AVX512-NEXT: retq
734
+ %m0 = fmul <16 x float > %x , <float 1 .0 , float 2 .0 , float 3 .0 , float 4 .0 , float 5 .0 , float 6 .0 , float 7 .0 , float 8 .0 , float 9 .0 , float 10 .0 , float 11 .0 , float 12 .0 , float 13 .0 , float 14 .0 , float 15 .0 , float 16 .0 >
735
+ %m1 = fmul <16 x float > %m0 , <float 16 .0 , float 15 .0 , float 14 .0 , float 13 .0 , float 12 .0 , float 11 .0 , float 10 .0 , float 9 .0 , float 8 .0 , float 7 .0 , float 6 .0 , float 5 .0 , float 4 .0 , float 3 .0 , float 2 .0 , float 1 .0 >
736
+ %a = fadd <16 x float > %m1 , %y
737
+ ret <16 x float > %a
738
+ }
739
+
740
+ attributes #0 = { "unsafe-fp-math" ="true" }
0 commit comments