@@ -3444,3 +3444,140 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
3444
3444
}
3445
3445
3446
3446
declare i8 @llvm.x86.avx512.mask.ucmp.q.512 (<8 x i64 >, <8 x i64 >, i32 , i8 ) nounwind readnone
3447
+
3448
+ declare <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float >, <16 x float >, i16 )
3449
+
3450
+ define <16 x float >@test_int_x86_avx512_mask_broadcastf32x4_512 (<4 x float > %x0 , <16 x float > %x2 , i16 %mask ) {
3451
+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
3452
+ ; CHECK: ## BB#0:
3453
+ ; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
3454
+ ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3455
+ ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
3456
+ ; CHECK-NEXT: kmovw %edi, %k1
3457
+ ; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1}
3458
+ ; CHECK-NEXT: vmovaps %zmm0, %zmm2 {%k1} {z}
3459
+ ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
3460
+ ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
3461
+ ; CHECK-NEXT: retq
3462
+
3463
+ %res1 = call <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float > %x0 , <16 x float > %x2 , i16 -1 )
3464
+ %res2 = call <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float > %x0 , <16 x float > %x2 , i16 %mask )
3465
+ %res3 = call <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float > %x0 , <16 x float > zeroinitializer , i16 %mask )
3466
+ %res4 = fadd <16 x float > %res1 , %res2
3467
+ %res5 = fadd <16 x float > %res3 , %res4
3468
+ ret <16 x float > %res5
3469
+ }
3470
+
3471
+ define <16 x float >@test_int_x86_avx512_mask_broadcastf32x4_512_load (<4 x float >* %x0ptr , <16 x float > %x2 , i16 %mask ) {
3472
+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512_load:
3473
+ ; CHECK: ## BB#0:
3474
+ ; CHECK-NEXT: kmovw %esi, %k1
3475
+ ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
3476
+ ; CHECK-NEXT: retq
3477
+ %x0 = load <4 x float >, <4 x float >* %x0ptr
3478
+ %res = call <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float > %x0 , <16 x float > %x2 , i16 %mask )
3479
+ ret <16 x float > %res
3480
+ }
3481
+
3482
+ declare <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double >, <8 x double >, i8 )
3483
+
3484
+ define <8 x double >@test_int_x86_avx512_mask_broadcastf64x4_512 (<4 x double > %x0 , <8 x double > %x2 , i8 %mask ) {
3485
+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
3486
+ ; CHECK: ## BB#0:
3487
+ ; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
3488
+ ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm2
3489
+ ; CHECK-NEXT: kmovw %edi, %k1
3490
+ ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1}
3491
+ ; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm1
3492
+ ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z}
3493
+ ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
3494
+ ; CHECK-NEXT: retq
3495
+
3496
+ %res1 = call <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double > %x0 , <8 x double > %x2 , i8 -1 )
3497
+ %res2 = call <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double > %x0 , <8 x double > %x2 , i8 %mask )
3498
+ %res3 = call <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double > %x0 , <8 x double > zeroinitializer , i8 %mask )
3499
+ %res4 = fadd <8 x double > %res1 , %res2
3500
+ %res5 = fadd <8 x double > %res3 , %res4
3501
+ ret <8 x double > %res5
3502
+ }
3503
+
3504
+ define <8 x double >@test_int_x86_avx512_mask_broadcastf64x4_512_load (<4 x double >* %x0ptr , <8 x double > %x2 , i8 %mask ) {
3505
+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512_load:
3506
+ ; CHECK: ## BB#0:
3507
+ ; CHECK-NEXT: kmovw %esi, %k1
3508
+ ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
3509
+ ; CHECK-NEXT: retq
3510
+
3511
+ %x0 = load <4 x double >, <4 x double >* %x0ptr
3512
+ %res = call <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double > %x0 , <8 x double > %x2 , i8 %mask )
3513
+ ret <8 x double > %res
3514
+ }
3515
+
3516
+ declare <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 >, <16 x i32 >, i16 )
3517
+
3518
+ define <16 x i32 >@test_int_x86_avx512_mask_broadcasti32x4_512 (<4 x i32 > %x0 , <16 x i32 > %x2 , i16 %mask ) {
3519
+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
3520
+ ; CHECK: ## BB#0:
3521
+ ; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
3522
+ ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
3523
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3524
+ ; CHECK-NEXT: kmovw %edi, %k1
3525
+ ; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
3526
+ ; CHECK-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z}
3527
+ ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
3528
+ ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
3529
+ ; CHECK-NEXT: retq
3530
+
3531
+ %res1 = call <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 > %x0 , <16 x i32 > %x2 , i16 -1 )
3532
+ %res2 = call <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 > %x0 , <16 x i32 > %x2 , i16 %mask )
3533
+ %res3 = call <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 > %x0 , <16 x i32 > zeroinitializer , i16 %mask )
3534
+ %res4 = add <16 x i32 > %res1 , %res2
3535
+ %res5 = add <16 x i32 > %res3 , %res4
3536
+ ret <16 x i32 > %res5
3537
+ }
3538
+
3539
+ define <16 x i32 >@test_int_x86_avx512_mask_broadcasti32x4_512_load (<4 x i32 >* %x0ptr , <16 x i32 > %x2 , i16 %mask ) {
3540
+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512_load:
3541
+ ; CHECK: ## BB#0:
3542
+ ; CHECK-NEXT: kmovw %esi, %k1
3543
+ ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
3544
+ ; CHECK-NEXT: retq
3545
+
3546
+ %x0 = load <4 x i32 >, <4 x i32 >* %x0ptr
3547
+ %res = call <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 > %x0 , <16 x i32 > %x2 , i16 %mask )
3548
+ ret <16 x i32 > %res
3549
+ }
3550
+
3551
+ declare <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 >, <8 x i64 >, i8 )
3552
+
3553
+ define <8 x i64 >@test_int_x86_avx512_mask_broadcasti64x4_512 (<4 x i64 > %x0 , <8 x i64 > %x2 , i8 %mask ) {
3554
+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
3555
+ ; CHECK: ## BB#0:
3556
+ ; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
3557
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2
3558
+ ; CHECK-NEXT: kmovw %edi, %k1
3559
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1}
3560
+ ; CHECK-NEXT: vpaddq %zmm1, %zmm2, %zmm1
3561
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z}
3562
+ ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
3563
+ ; CHECK-NEXT: retq
3564
+
3565
+ %res1 = call <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 > %x0 , <8 x i64 > %x2 , i8 -1 )
3566
+ %res2 = call <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 > %x0 , <8 x i64 > %x2 , i8 %mask )
3567
+ %res3 = call <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 > %x0 , <8 x i64 > zeroinitializer , i8 %mask )
3568
+ %res4 = add <8 x i64 > %res1 , %res2
3569
+ %res5 = add <8 x i64 > %res3 , %res4
3570
+ ret <8 x i64 > %res5
3571
+ }
3572
+
3573
+ define <8 x i64 >@test_int_x86_avx512_mask_broadcasti64x4_512_load (<4 x i64 >* %x0ptr , <8 x i64 > %x2 , i8 %mask ) {
3574
+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512_load:
3575
+ ; CHECK: ## BB#0:
3576
+ ; CHECK-NEXT: kmovw %esi, %k1
3577
+ ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
3578
+ ; CHECK-NEXT: retq
3579
+
3580
+ %x0 = load <4 x i64 >, <4 x i64 >* %x0ptr
3581
+ %res = call <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 > %x0 , <8 x i64 > %x2 , i8 %mask )
3582
+ ret <8 x i64 > %res
3583
+ }
0 commit comments