Skip to content

Commit afc7c63

Browse files
committedFeb 1, 2018
[X86][AVX512DQ] Add DQ var permute 256 tests as requested on D42487
llvm-svn: 323970
1 parent 44ef345 commit afc7c63

File tree

1 file changed

+462
-0
lines changed

1 file changed

+462
-0
lines changed
 

‎llvm/test/CodeGen/X86/var-permute-256.ll

+462
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX1
33
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,INT256,AVX2
44
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,INT256,AVX512,AVX512F
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,INT256,AVX512,AVX512DQ
56
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,INT256,AVX512,AVX512VL
7+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,INT256,AVX512,AVX512VL,AVX512DQVL
68
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,INT256,AVX512,AVX512VLBW
79
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+avx512vbmi | FileCheck %s --check-prefixes=AVX,INT256,AVX512,AVX512VLBW,VBMI
810

@@ -88,6 +90,33 @@ define <4 x i64> @var_shuffle_v4i64(<4 x i64> %v, <4 x i64> %indices) nounwind {
8890
; AVX512F-NEXT: popq %rbp
8991
; AVX512F-NEXT: retq
9092
;
93+
; AVX512DQ-LABEL: var_shuffle_v4i64:
94+
; AVX512DQ: # %bb.0:
95+
; AVX512DQ-NEXT: pushq %rbp
96+
; AVX512DQ-NEXT: movq %rsp, %rbp
97+
; AVX512DQ-NEXT: andq $-32, %rsp
98+
; AVX512DQ-NEXT: subq $64, %rsp
99+
; AVX512DQ-NEXT: vmovq %xmm1, %rax
100+
; AVX512DQ-NEXT: andl $3, %eax
101+
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rcx
102+
; AVX512DQ-NEXT: andl $3, %ecx
103+
; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1
104+
; AVX512DQ-NEXT: vmovq %xmm1, %rdx
105+
; AVX512DQ-NEXT: andl $3, %edx
106+
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi
107+
; AVX512DQ-NEXT: andl $3, %esi
108+
; AVX512DQ-NEXT: vmovaps %ymm0, (%rsp)
109+
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
110+
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
111+
; AVX512DQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
112+
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
113+
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
114+
; AVX512DQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
115+
; AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
116+
; AVX512DQ-NEXT: movq %rbp, %rsp
117+
; AVX512DQ-NEXT: popq %rbp
118+
; AVX512DQ-NEXT: retq
119+
;
91120
; AVX512VL-LABEL: var_shuffle_v4i64:
92121
; AVX512VL: # %bb.0:
93122
; AVX512VL-NEXT: vpermpd %ymm0, %ymm1, %ymm0
@@ -371,6 +400,69 @@ define <16 x i16> @var_shuffle_v16i16(<16 x i16> %v, <16 x i16> %indices) nounwi
371400
; AVX512F-NEXT: popq %rbp
372401
; AVX512F-NEXT: retq
373402
;
403+
; AVX512DQ-LABEL: var_shuffle_v16i16:
404+
; AVX512DQ: # %bb.0:
405+
; AVX512DQ-NEXT: pushq %rbp
406+
; AVX512DQ-NEXT: movq %rsp, %rbp
407+
; AVX512DQ-NEXT: andq $-32, %rsp
408+
; AVX512DQ-NEXT: subq $64, %rsp
409+
; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2
410+
; AVX512DQ-NEXT: vmovd %xmm2, %eax
411+
; AVX512DQ-NEXT: vmovaps %ymm0, (%rsp)
412+
; AVX512DQ-NEXT: andl $15, %eax
413+
; AVX512DQ-NEXT: movzwl (%rsp,%rax,2), %eax
414+
; AVX512DQ-NEXT: vmovd %eax, %xmm0
415+
; AVX512DQ-NEXT: vpextrw $1, %xmm2, %eax
416+
; AVX512DQ-NEXT: andl $15, %eax
417+
; AVX512DQ-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm0, %xmm0
418+
; AVX512DQ-NEXT: vpextrw $2, %xmm2, %eax
419+
; AVX512DQ-NEXT: andl $15, %eax
420+
; AVX512DQ-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm0, %xmm0
421+
; AVX512DQ-NEXT: vpextrw $3, %xmm2, %eax
422+
; AVX512DQ-NEXT: andl $15, %eax
423+
; AVX512DQ-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm0, %xmm0
424+
; AVX512DQ-NEXT: vpextrw $4, %xmm2, %eax
425+
; AVX512DQ-NEXT: andl $15, %eax
426+
; AVX512DQ-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm0, %xmm0
427+
; AVX512DQ-NEXT: vpextrw $5, %xmm2, %eax
428+
; AVX512DQ-NEXT: andl $15, %eax
429+
; AVX512DQ-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm0, %xmm0
430+
; AVX512DQ-NEXT: vpextrw $6, %xmm2, %eax
431+
; AVX512DQ-NEXT: andl $15, %eax
432+
; AVX512DQ-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0
433+
; AVX512DQ-NEXT: vpextrw $7, %xmm2, %eax
434+
; AVX512DQ-NEXT: andl $15, %eax
435+
; AVX512DQ-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
436+
; AVX512DQ-NEXT: vmovd %xmm1, %eax
437+
; AVX512DQ-NEXT: andl $15, %eax
438+
; AVX512DQ-NEXT: movzwl (%rsp,%rax,2), %eax
439+
; AVX512DQ-NEXT: vmovd %eax, %xmm2
440+
; AVX512DQ-NEXT: vpextrw $1, %xmm1, %eax
441+
; AVX512DQ-NEXT: andl $15, %eax
442+
; AVX512DQ-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm2, %xmm2
443+
; AVX512DQ-NEXT: vpextrw $2, %xmm1, %eax
444+
; AVX512DQ-NEXT: andl $15, %eax
445+
; AVX512DQ-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm2, %xmm2
446+
; AVX512DQ-NEXT: vpextrw $3, %xmm1, %eax
447+
; AVX512DQ-NEXT: andl $15, %eax
448+
; AVX512DQ-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm2, %xmm2
449+
; AVX512DQ-NEXT: vpextrw $4, %xmm1, %eax
450+
; AVX512DQ-NEXT: andl $15, %eax
451+
; AVX512DQ-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm2, %xmm2
452+
; AVX512DQ-NEXT: vpextrw $5, %xmm1, %eax
453+
; AVX512DQ-NEXT: andl $15, %eax
454+
; AVX512DQ-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm2, %xmm2
455+
; AVX512DQ-NEXT: vpextrw $6, %xmm1, %eax
456+
; AVX512DQ-NEXT: andl $15, %eax
457+
; AVX512DQ-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm2, %xmm2
458+
; AVX512DQ-NEXT: vpextrw $7, %xmm1, %eax
459+
; AVX512DQ-NEXT: andl $15, %eax
460+
; AVX512DQ-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm2, %xmm1
461+
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
462+
; AVX512DQ-NEXT: movq %rbp, %rsp
463+
; AVX512DQ-NEXT: popq %rbp
464+
; AVX512DQ-NEXT: retq
465+
;
374466
; AVX512VL-LABEL: var_shuffle_v16i16:
375467
; AVX512VL: # %bb.0:
376468
; AVX512VL-NEXT: pushq %rbp
@@ -871,6 +963,133 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
871963
; AVX512F-NEXT: popq %rbp
872964
; AVX512F-NEXT: retq
873965
;
966+
; AVX512DQ-LABEL: var_shuffle_v32i8:
967+
; AVX512DQ: # %bb.0:
968+
; AVX512DQ-NEXT: pushq %rbp
969+
; AVX512DQ-NEXT: movq %rsp, %rbp
970+
; AVX512DQ-NEXT: andq $-32, %rsp
971+
; AVX512DQ-NEXT: subq $64, %rsp
972+
; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2
973+
; AVX512DQ-NEXT: vpextrb $0, %xmm2, %eax
974+
; AVX512DQ-NEXT: vmovaps %ymm0, (%rsp)
975+
; AVX512DQ-NEXT: andl $31, %eax
976+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
977+
; AVX512DQ-NEXT: vmovd %eax, %xmm0
978+
; AVX512DQ-NEXT: vpextrb $1, %xmm2, %eax
979+
; AVX512DQ-NEXT: andl $31, %eax
980+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
981+
; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
982+
; AVX512DQ-NEXT: vpextrb $2, %xmm2, %eax
983+
; AVX512DQ-NEXT: andl $31, %eax
984+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
985+
; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
986+
; AVX512DQ-NEXT: vpextrb $3, %xmm2, %eax
987+
; AVX512DQ-NEXT: andl $31, %eax
988+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
989+
; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
990+
; AVX512DQ-NEXT: vpextrb $4, %xmm2, %eax
991+
; AVX512DQ-NEXT: andl $31, %eax
992+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
993+
; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
994+
; AVX512DQ-NEXT: vpextrb $5, %xmm2, %eax
995+
; AVX512DQ-NEXT: andl $31, %eax
996+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
997+
; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
998+
; AVX512DQ-NEXT: vpextrb $6, %xmm2, %eax
999+
; AVX512DQ-NEXT: andl $31, %eax
1000+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
1001+
; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1002+
; AVX512DQ-NEXT: vpextrb $7, %xmm2, %eax
1003+
; AVX512DQ-NEXT: andl $31, %eax
1004+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
1005+
; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
1006+
; AVX512DQ-NEXT: vpextrb $8, %xmm2, %eax
1007+
; AVX512DQ-NEXT: andl $31, %eax
1008+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
1009+
; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
1010+
; AVX512DQ-NEXT: vpextrb $9, %xmm2, %eax
1011+
; AVX512DQ-NEXT: andl $31, %eax
1012+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
1013+
; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
1014+
; AVX512DQ-NEXT: vpextrb $10, %xmm2, %eax
1015+
; AVX512DQ-NEXT: andl $31, %eax
1016+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
1017+
; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1018+
; AVX512DQ-NEXT: vpextrb $11, %xmm2, %eax
1019+
; AVX512DQ-NEXT: andl $31, %eax
1020+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
1021+
; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
1022+
; AVX512DQ-NEXT: vpextrb $12, %xmm2, %eax
1023+
; AVX512DQ-NEXT: andl $31, %eax
1024+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
1025+
; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1026+
; AVX512DQ-NEXT: vpextrb $13, %xmm2, %eax
1027+
; AVX512DQ-NEXT: andl $31, %eax
1028+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
1029+
; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
1030+
; AVX512DQ-NEXT: vpextrb $14, %xmm2, %eax
1031+
; AVX512DQ-NEXT: andl $31, %eax
1032+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
1033+
; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1034+
; AVX512DQ-NEXT: vpextrb $15, %xmm2, %eax
1035+
; AVX512DQ-NEXT: andl $31, %eax
1036+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
1037+
; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
1038+
; AVX512DQ-NEXT: vpextrb $0, %xmm1, %eax
1039+
; AVX512DQ-NEXT: andl $31, %eax
1040+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
1041+
; AVX512DQ-NEXT: vmovd %eax, %xmm2
1042+
; AVX512DQ-NEXT: vpextrb $1, %xmm1, %eax
1043+
; AVX512DQ-NEXT: andl $31, %eax
1044+
; AVX512DQ-NEXT: vpinsrb $1, (%rsp,%rax), %xmm2, %xmm2
1045+
; AVX512DQ-NEXT: vpextrb $2, %xmm1, %eax
1046+
; AVX512DQ-NEXT: andl $31, %eax
1047+
; AVX512DQ-NEXT: vpinsrb $2, (%rsp,%rax), %xmm2, %xmm2
1048+
; AVX512DQ-NEXT: vpextrb $3, %xmm1, %eax
1049+
; AVX512DQ-NEXT: andl $31, %eax
1050+
; AVX512DQ-NEXT: vpinsrb $3, (%rsp,%rax), %xmm2, %xmm2
1051+
; AVX512DQ-NEXT: vpextrb $4, %xmm1, %eax
1052+
; AVX512DQ-NEXT: andl $31, %eax
1053+
; AVX512DQ-NEXT: vpinsrb $4, (%rsp,%rax), %xmm2, %xmm2
1054+
; AVX512DQ-NEXT: vpextrb $5, %xmm1, %eax
1055+
; AVX512DQ-NEXT: andl $31, %eax
1056+
; AVX512DQ-NEXT: vpinsrb $5, (%rsp,%rax), %xmm2, %xmm2
1057+
; AVX512DQ-NEXT: vpextrb $6, %xmm1, %eax
1058+
; AVX512DQ-NEXT: andl $31, %eax
1059+
; AVX512DQ-NEXT: vpinsrb $6, (%rsp,%rax), %xmm2, %xmm2
1060+
; AVX512DQ-NEXT: vpextrb $7, %xmm1, %eax
1061+
; AVX512DQ-NEXT: andl $31, %eax
1062+
; AVX512DQ-NEXT: vpinsrb $7, (%rsp,%rax), %xmm2, %xmm2
1063+
; AVX512DQ-NEXT: vpextrb $8, %xmm1, %eax
1064+
; AVX512DQ-NEXT: andl $31, %eax
1065+
; AVX512DQ-NEXT: vpinsrb $8, (%rsp,%rax), %xmm2, %xmm2
1066+
; AVX512DQ-NEXT: vpextrb $9, %xmm1, %eax
1067+
; AVX512DQ-NEXT: andl $31, %eax
1068+
; AVX512DQ-NEXT: vpinsrb $9, (%rsp,%rax), %xmm2, %xmm2
1069+
; AVX512DQ-NEXT: vpextrb $10, %xmm1, %eax
1070+
; AVX512DQ-NEXT: andl $31, %eax
1071+
; AVX512DQ-NEXT: vpinsrb $10, (%rsp,%rax), %xmm2, %xmm2
1072+
; AVX512DQ-NEXT: vpextrb $11, %xmm1, %eax
1073+
; AVX512DQ-NEXT: andl $31, %eax
1074+
; AVX512DQ-NEXT: vpinsrb $11, (%rsp,%rax), %xmm2, %xmm2
1075+
; AVX512DQ-NEXT: vpextrb $12, %xmm1, %eax
1076+
; AVX512DQ-NEXT: andl $31, %eax
1077+
; AVX512DQ-NEXT: vpinsrb $12, (%rsp,%rax), %xmm2, %xmm2
1078+
; AVX512DQ-NEXT: vpextrb $13, %xmm1, %eax
1079+
; AVX512DQ-NEXT: andl $31, %eax
1080+
; AVX512DQ-NEXT: vpinsrb $13, (%rsp,%rax), %xmm2, %xmm2
1081+
; AVX512DQ-NEXT: vpextrb $14, %xmm1, %eax
1082+
; AVX512DQ-NEXT: andl $31, %eax
1083+
; AVX512DQ-NEXT: vpinsrb $14, (%rsp,%rax), %xmm2, %xmm2
1084+
; AVX512DQ-NEXT: vpextrb $15, %xmm1, %eax
1085+
; AVX512DQ-NEXT: andl $31, %eax
1086+
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
1087+
; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
1088+
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1089+
; AVX512DQ-NEXT: movq %rbp, %rsp
1090+
; AVX512DQ-NEXT: popq %rbp
1091+
; AVX512DQ-NEXT: retq
1092+
;
8741093
; AVX512VL-LABEL: var_shuffle_v32i8:
8751094
; AVX512VL: # %bb.0:
8761095
; AVX512VL-NEXT: pushq %rbp
@@ -1177,6 +1396,31 @@ define <4 x double> @var_shuffle_v4f64(<4 x double> %v, <4 x i64> %indices) noun
11771396
; AVX512F-NEXT: popq %rbp
11781397
; AVX512F-NEXT: retq
11791398
;
1399+
; AVX512DQ-LABEL: var_shuffle_v4f64:
1400+
; AVX512DQ: # %bb.0:
1401+
; AVX512DQ-NEXT: pushq %rbp
1402+
; AVX512DQ-NEXT: movq %rsp, %rbp
1403+
; AVX512DQ-NEXT: andq $-32, %rsp
1404+
; AVX512DQ-NEXT: subq $64, %rsp
1405+
; AVX512DQ-NEXT: vmovq %xmm1, %rax
1406+
; AVX512DQ-NEXT: andl $3, %eax
1407+
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rcx
1408+
; AVX512DQ-NEXT: andl $3, %ecx
1409+
; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1
1410+
; AVX512DQ-NEXT: vmovq %xmm1, %rdx
1411+
; AVX512DQ-NEXT: andl $3, %edx
1412+
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi
1413+
; AVX512DQ-NEXT: andl $3, %esi
1414+
; AVX512DQ-NEXT: vmovaps %ymm0, (%rsp)
1415+
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1416+
; AVX512DQ-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1417+
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
1418+
; AVX512DQ-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
1419+
; AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1420+
; AVX512DQ-NEXT: movq %rbp, %rsp
1421+
; AVX512DQ-NEXT: popq %rbp
1422+
; AVX512DQ-NEXT: retq
1423+
;
11801424
; AVX512VL-LABEL: var_shuffle_v4f64:
11811425
; AVX512VL: # %bb.0:
11821426
; AVX512VL-NEXT: vpermpd %ymm0, %ymm1, %ymm0
@@ -1338,6 +1582,27 @@ define <4 x i64> @var_shuffle_v4i64_from_v2i64(<2 x i64> %v, <4 x i64> %indices)
13381582
; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
13391583
; AVX512F-NEXT: retq
13401584
;
1585+
; AVX512DQ-LABEL: var_shuffle_v4i64_from_v2i64:
1586+
; AVX512DQ: # %bb.0:
1587+
; AVX512DQ-NEXT: vmovq %xmm1, %rax
1588+
; AVX512DQ-NEXT: andl $1, %eax
1589+
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rcx
1590+
; AVX512DQ-NEXT: andl $1, %ecx
1591+
; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1
1592+
; AVX512DQ-NEXT: vmovq %xmm1, %rdx
1593+
; AVX512DQ-NEXT: andl $1, %edx
1594+
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi
1595+
; AVX512DQ-NEXT: andl $1, %esi
1596+
; AVX512DQ-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1597+
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1598+
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
1599+
; AVX512DQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1600+
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
1601+
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
1602+
; AVX512DQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1603+
; AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1604+
; AVX512DQ-NEXT: retq
1605+
;
13411606
; AVX512VL-LABEL: var_shuffle_v4i64_from_v2i64:
13421607
; AVX512VL: # %bb.0:
13431608
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
@@ -1601,6 +1866,63 @@ define <16 x i16> @var_shuffle_v16i16_from_v8i16(<8 x i16> %v, <16 x i16> %indic
16011866
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
16021867
; AVX512F-NEXT: retq
16031868
;
1869+
; AVX512DQ-LABEL: var_shuffle_v16i16_from_v8i16:
1870+
; AVX512DQ: # %bb.0:
1871+
; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2
1872+
; AVX512DQ-NEXT: vmovd %xmm2, %eax
1873+
; AVX512DQ-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1874+
; AVX512DQ-NEXT: andl $7, %eax
1875+
; AVX512DQ-NEXT: movzwl -24(%rsp,%rax,2), %eax
1876+
; AVX512DQ-NEXT: vmovd %eax, %xmm0
1877+
; AVX512DQ-NEXT: vpextrw $1, %xmm2, %eax
1878+
; AVX512DQ-NEXT: andl $7, %eax
1879+
; AVX512DQ-NEXT: vpinsrw $1, -24(%rsp,%rax,2), %xmm0, %xmm0
1880+
; AVX512DQ-NEXT: vpextrw $2, %xmm2, %eax
1881+
; AVX512DQ-NEXT: andl $7, %eax
1882+
; AVX512DQ-NEXT: vpinsrw $2, -24(%rsp,%rax,2), %xmm0, %xmm0
1883+
; AVX512DQ-NEXT: vpextrw $3, %xmm2, %eax
1884+
; AVX512DQ-NEXT: andl $7, %eax
1885+
; AVX512DQ-NEXT: vpinsrw $3, -24(%rsp,%rax,2), %xmm0, %xmm0
1886+
; AVX512DQ-NEXT: vpextrw $4, %xmm2, %eax
1887+
; AVX512DQ-NEXT: andl $7, %eax
1888+
; AVX512DQ-NEXT: vpinsrw $4, -24(%rsp,%rax,2), %xmm0, %xmm0
1889+
; AVX512DQ-NEXT: vpextrw $5, %xmm2, %eax
1890+
; AVX512DQ-NEXT: andl $7, %eax
1891+
; AVX512DQ-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm0, %xmm0
1892+
; AVX512DQ-NEXT: vpextrw $6, %xmm2, %eax
1893+
; AVX512DQ-NEXT: andl $7, %eax
1894+
; AVX512DQ-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm0, %xmm0
1895+
; AVX512DQ-NEXT: vpextrw $7, %xmm2, %eax
1896+
; AVX512DQ-NEXT: andl $7, %eax
1897+
; AVX512DQ-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm0, %xmm0
1898+
; AVX512DQ-NEXT: vmovd %xmm1, %eax
1899+
; AVX512DQ-NEXT: andl $7, %eax
1900+
; AVX512DQ-NEXT: movzwl -24(%rsp,%rax,2), %eax
1901+
; AVX512DQ-NEXT: vmovd %eax, %xmm2
1902+
; AVX512DQ-NEXT: vpextrw $1, %xmm1, %eax
1903+
; AVX512DQ-NEXT: andl $7, %eax
1904+
; AVX512DQ-NEXT: vpinsrw $1, -24(%rsp,%rax,2), %xmm2, %xmm2
1905+
; AVX512DQ-NEXT: vpextrw $2, %xmm1, %eax
1906+
; AVX512DQ-NEXT: andl $7, %eax
1907+
; AVX512DQ-NEXT: vpinsrw $2, -24(%rsp,%rax,2), %xmm2, %xmm2
1908+
; AVX512DQ-NEXT: vpextrw $3, %xmm1, %eax
1909+
; AVX512DQ-NEXT: andl $7, %eax
1910+
; AVX512DQ-NEXT: vpinsrw $3, -24(%rsp,%rax,2), %xmm2, %xmm2
1911+
; AVX512DQ-NEXT: vpextrw $4, %xmm1, %eax
1912+
; AVX512DQ-NEXT: andl $7, %eax
1913+
; AVX512DQ-NEXT: vpinsrw $4, -24(%rsp,%rax,2), %xmm2, %xmm2
1914+
; AVX512DQ-NEXT: vpextrw $5, %xmm1, %eax
1915+
; AVX512DQ-NEXT: andl $7, %eax
1916+
; AVX512DQ-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm2, %xmm2
1917+
; AVX512DQ-NEXT: vpextrw $6, %xmm1, %eax
1918+
; AVX512DQ-NEXT: andl $7, %eax
1919+
; AVX512DQ-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm2, %xmm2
1920+
; AVX512DQ-NEXT: vpextrw $7, %xmm1, %eax
1921+
; AVX512DQ-NEXT: andl $7, %eax
1922+
; AVX512DQ-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm2, %xmm1
1923+
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1924+
; AVX512DQ-NEXT: retq
1925+
;
16041926
; AVX512VL-LABEL: var_shuffle_v16i16_from_v8i16:
16051927
; AVX512VL: # %bb.0:
16061928
; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm2
@@ -2078,6 +2400,127 @@ define <32 x i8> @var_shuffle_v32i8_from_v16i8(<16 x i8> %v, <32 x i8> %indices)
20782400
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
20792401
; AVX512F-NEXT: retq
20802402
;
2403+
; AVX512DQ-LABEL: var_shuffle_v32i8_from_v16i8:
2404+
; AVX512DQ: # %bb.0:
2405+
; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2
2406+
; AVX512DQ-NEXT: vpextrb $0, %xmm2, %eax
2407+
; AVX512DQ-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
2408+
; AVX512DQ-NEXT: andl $15, %eax
2409+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2410+
; AVX512DQ-NEXT: vmovd %eax, %xmm0
2411+
; AVX512DQ-NEXT: vpextrb $1, %xmm2, %eax
2412+
; AVX512DQ-NEXT: andl $15, %eax
2413+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2414+
; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
2415+
; AVX512DQ-NEXT: vpextrb $2, %xmm2, %eax
2416+
; AVX512DQ-NEXT: andl $15, %eax
2417+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2418+
; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
2419+
; AVX512DQ-NEXT: vpextrb $3, %xmm2, %eax
2420+
; AVX512DQ-NEXT: andl $15, %eax
2421+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2422+
; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
2423+
; AVX512DQ-NEXT: vpextrb $4, %xmm2, %eax
2424+
; AVX512DQ-NEXT: andl $15, %eax
2425+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2426+
; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
2427+
; AVX512DQ-NEXT: vpextrb $5, %xmm2, %eax
2428+
; AVX512DQ-NEXT: andl $15, %eax
2429+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2430+
; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
2431+
; AVX512DQ-NEXT: vpextrb $6, %xmm2, %eax
2432+
; AVX512DQ-NEXT: andl $15, %eax
2433+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2434+
; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
2435+
; AVX512DQ-NEXT: vpextrb $7, %xmm2, %eax
2436+
; AVX512DQ-NEXT: andl $15, %eax
2437+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2438+
; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
2439+
; AVX512DQ-NEXT: vpextrb $8, %xmm2, %eax
2440+
; AVX512DQ-NEXT: andl $15, %eax
2441+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2442+
; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
2443+
; AVX512DQ-NEXT: vpextrb $9, %xmm2, %eax
2444+
; AVX512DQ-NEXT: andl $15, %eax
2445+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2446+
; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
2447+
; AVX512DQ-NEXT: vpextrb $10, %xmm2, %eax
2448+
; AVX512DQ-NEXT: andl $15, %eax
2449+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2450+
; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
2451+
; AVX512DQ-NEXT: vpextrb $11, %xmm2, %eax
2452+
; AVX512DQ-NEXT: andl $15, %eax
2453+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2454+
; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
2455+
; AVX512DQ-NEXT: vpextrb $12, %xmm2, %eax
2456+
; AVX512DQ-NEXT: andl $15, %eax
2457+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2458+
; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
2459+
; AVX512DQ-NEXT: vpextrb $13, %xmm2, %eax
2460+
; AVX512DQ-NEXT: andl $15, %eax
2461+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2462+
; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
2463+
; AVX512DQ-NEXT: vpextrb $14, %xmm2, %eax
2464+
; AVX512DQ-NEXT: andl $15, %eax
2465+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2466+
; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
2467+
; AVX512DQ-NEXT: vpextrb $15, %xmm2, %eax
2468+
; AVX512DQ-NEXT: andl $15, %eax
2469+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2470+
; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
2471+
; AVX512DQ-NEXT: vpextrb $0, %xmm1, %eax
2472+
; AVX512DQ-NEXT: andl $15, %eax
2473+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2474+
; AVX512DQ-NEXT: vmovd %eax, %xmm2
2475+
; AVX512DQ-NEXT: vpextrb $1, %xmm1, %eax
2476+
; AVX512DQ-NEXT: andl $15, %eax
2477+
; AVX512DQ-NEXT: vpinsrb $1, -24(%rsp,%rax), %xmm2, %xmm2
2478+
; AVX512DQ-NEXT: vpextrb $2, %xmm1, %eax
2479+
; AVX512DQ-NEXT: andl $15, %eax
2480+
; AVX512DQ-NEXT: vpinsrb $2, -24(%rsp,%rax), %xmm2, %xmm2
2481+
; AVX512DQ-NEXT: vpextrb $3, %xmm1, %eax
2482+
; AVX512DQ-NEXT: andl $15, %eax
2483+
; AVX512DQ-NEXT: vpinsrb $3, -24(%rsp,%rax), %xmm2, %xmm2
2484+
; AVX512DQ-NEXT: vpextrb $4, %xmm1, %eax
2485+
; AVX512DQ-NEXT: andl $15, %eax
2486+
; AVX512DQ-NEXT: vpinsrb $4, -24(%rsp,%rax), %xmm2, %xmm2
2487+
; AVX512DQ-NEXT: vpextrb $5, %xmm1, %eax
2488+
; AVX512DQ-NEXT: andl $15, %eax
2489+
; AVX512DQ-NEXT: vpinsrb $5, -24(%rsp,%rax), %xmm2, %xmm2
2490+
; AVX512DQ-NEXT: vpextrb $6, %xmm1, %eax
2491+
; AVX512DQ-NEXT: andl $15, %eax
2492+
; AVX512DQ-NEXT: vpinsrb $6, -24(%rsp,%rax), %xmm2, %xmm2
2493+
; AVX512DQ-NEXT: vpextrb $7, %xmm1, %eax
2494+
; AVX512DQ-NEXT: andl $15, %eax
2495+
; AVX512DQ-NEXT: vpinsrb $7, -24(%rsp,%rax), %xmm2, %xmm2
2496+
; AVX512DQ-NEXT: vpextrb $8, %xmm1, %eax
2497+
; AVX512DQ-NEXT: andl $15, %eax
2498+
; AVX512DQ-NEXT: vpinsrb $8, -24(%rsp,%rax), %xmm2, %xmm2
2499+
; AVX512DQ-NEXT: vpextrb $9, %xmm1, %eax
2500+
; AVX512DQ-NEXT: andl $15, %eax
2501+
; AVX512DQ-NEXT: vpinsrb $9, -24(%rsp,%rax), %xmm2, %xmm2
2502+
; AVX512DQ-NEXT: vpextrb $10, %xmm1, %eax
2503+
; AVX512DQ-NEXT: andl $15, %eax
2504+
; AVX512DQ-NEXT: vpinsrb $10, -24(%rsp,%rax), %xmm2, %xmm2
2505+
; AVX512DQ-NEXT: vpextrb $11, %xmm1, %eax
2506+
; AVX512DQ-NEXT: andl $15, %eax
2507+
; AVX512DQ-NEXT: vpinsrb $11, -24(%rsp,%rax), %xmm2, %xmm2
2508+
; AVX512DQ-NEXT: vpextrb $12, %xmm1, %eax
2509+
; AVX512DQ-NEXT: andl $15, %eax
2510+
; AVX512DQ-NEXT: vpinsrb $12, -24(%rsp,%rax), %xmm2, %xmm2
2511+
; AVX512DQ-NEXT: vpextrb $13, %xmm1, %eax
2512+
; AVX512DQ-NEXT: andl $15, %eax
2513+
; AVX512DQ-NEXT: vpinsrb $13, -24(%rsp,%rax), %xmm2, %xmm2
2514+
; AVX512DQ-NEXT: vpextrb $14, %xmm1, %eax
2515+
; AVX512DQ-NEXT: andl $15, %eax
2516+
; AVX512DQ-NEXT: vpinsrb $14, -24(%rsp,%rax), %xmm2, %xmm2
2517+
; AVX512DQ-NEXT: vpextrb $15, %xmm1, %eax
2518+
; AVX512DQ-NEXT: andl $15, %eax
2519+
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
2520+
; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
2521+
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
2522+
; AVX512DQ-NEXT: retq
2523+
;
20812524
; AVX512VL-LABEL: var_shuffle_v32i8_from_v16i8:
20822525
; AVX512VL: # %bb.0:
20832526
; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm2
@@ -2361,6 +2804,25 @@ define <4 x double> @var_shuffle_v4f64_from_v2f64(<2 x double> %v, <4 x i64> %in
23612804
; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
23622805
; AVX512F-NEXT: retq
23632806
;
2807+
; AVX512DQ-LABEL: var_shuffle_v4f64_from_v2f64:
2808+
; AVX512DQ: # %bb.0:
2809+
; AVX512DQ-NEXT: vmovq %xmm1, %rax
2810+
; AVX512DQ-NEXT: andl $1, %eax
2811+
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rcx
2812+
; AVX512DQ-NEXT: andl $1, %ecx
2813+
; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1
2814+
; AVX512DQ-NEXT: vmovq %xmm1, %rdx
2815+
; AVX512DQ-NEXT: andl $1, %edx
2816+
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi
2817+
; AVX512DQ-NEXT: andl $1, %esi
2818+
; AVX512DQ-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
2819+
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2820+
; AVX512DQ-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
2821+
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
2822+
; AVX512DQ-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
2823+
; AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2824+
; AVX512DQ-NEXT: retq
2825+
;
23642826
; AVX512VL-LABEL: var_shuffle_v4f64_from_v2f64:
23652827
; AVX512VL: # %bb.0:
23662828
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0

0 commit comments

Comments
 (0)
Please sign in to comment.