|
2 | 2 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX1
|
3 | 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,INT256,AVX2
|
4 | 4 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,INT256,AVX512,AVX512F
|
| 5 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,INT256,AVX512,AVX512DQ |
5 | 6 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,INT256,AVX512,AVX512VL
|
| 7 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,INT256,AVX512,AVX512VL,AVX512DQVL |
6 | 8 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,INT256,AVX512,AVX512VLBW
|
7 | 9 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+avx512vbmi | FileCheck %s --check-prefixes=AVX,INT256,AVX512,AVX512VLBW,VBMI
|
8 | 10 |
|
@@ -88,6 +90,33 @@ define <4 x i64> @var_shuffle_v4i64(<4 x i64> %v, <4 x i64> %indices) nounwind {
|
88 | 90 | ; AVX512F-NEXT: popq %rbp
|
89 | 91 | ; AVX512F-NEXT: retq
|
90 | 92 | ;
|
| 93 | +; AVX512DQ-LABEL: var_shuffle_v4i64: |
| 94 | +; AVX512DQ: # %bb.0: |
| 95 | +; AVX512DQ-NEXT: pushq %rbp |
| 96 | +; AVX512DQ-NEXT: movq %rsp, %rbp |
| 97 | +; AVX512DQ-NEXT: andq $-32, %rsp |
| 98 | +; AVX512DQ-NEXT: subq $64, %rsp |
| 99 | +; AVX512DQ-NEXT: vmovq %xmm1, %rax |
| 100 | +; AVX512DQ-NEXT: andl $3, %eax |
| 101 | +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rcx |
| 102 | +; AVX512DQ-NEXT: andl $3, %ecx |
| 103 | +; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1 |
| 104 | +; AVX512DQ-NEXT: vmovq %xmm1, %rdx |
| 105 | +; AVX512DQ-NEXT: andl $3, %edx |
| 106 | +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi |
| 107 | +; AVX512DQ-NEXT: andl $3, %esi |
| 108 | +; AVX512DQ-NEXT: vmovaps %ymm0, (%rsp) |
| 109 | +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| 110 | +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero |
| 111 | +; AVX512DQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] |
| 112 | +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero |
| 113 | +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero |
| 114 | +; AVX512DQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] |
| 115 | +; AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 |
| 116 | +; AVX512DQ-NEXT: movq %rbp, %rsp |
| 117 | +; AVX512DQ-NEXT: popq %rbp |
| 118 | +; AVX512DQ-NEXT: retq |
| 119 | +; |
91 | 120 | ; AVX512VL-LABEL: var_shuffle_v4i64:
|
92 | 121 | ; AVX512VL: # %bb.0:
|
93 | 122 | ; AVX512VL-NEXT: vpermpd %ymm0, %ymm1, %ymm0
|
@@ -371,6 +400,69 @@ define <16 x i16> @var_shuffle_v16i16(<16 x i16> %v, <16 x i16> %indices) nounwi
|
371 | 400 | ; AVX512F-NEXT: popq %rbp
|
372 | 401 | ; AVX512F-NEXT: retq
|
373 | 402 | ;
|
| 403 | +; AVX512DQ-LABEL: var_shuffle_v16i16: |
| 404 | +; AVX512DQ: # %bb.0: |
| 405 | +; AVX512DQ-NEXT: pushq %rbp |
| 406 | +; AVX512DQ-NEXT: movq %rsp, %rbp |
| 407 | +; AVX512DQ-NEXT: andq $-32, %rsp |
| 408 | +; AVX512DQ-NEXT: subq $64, %rsp |
| 409 | +; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2 |
| 410 | +; AVX512DQ-NEXT: vmovd %xmm2, %eax |
| 411 | +; AVX512DQ-NEXT: vmovaps %ymm0, (%rsp) |
| 412 | +; AVX512DQ-NEXT: andl $15, %eax |
| 413 | +; AVX512DQ-NEXT: movzwl (%rsp,%rax,2), %eax |
| 414 | +; AVX512DQ-NEXT: vmovd %eax, %xmm0 |
| 415 | +; AVX512DQ-NEXT: vpextrw $1, %xmm2, %eax |
| 416 | +; AVX512DQ-NEXT: andl $15, %eax |
| 417 | +; AVX512DQ-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm0, %xmm0 |
| 418 | +; AVX512DQ-NEXT: vpextrw $2, %xmm2, %eax |
| 419 | +; AVX512DQ-NEXT: andl $15, %eax |
| 420 | +; AVX512DQ-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm0, %xmm0 |
| 421 | +; AVX512DQ-NEXT: vpextrw $3, %xmm2, %eax |
| 422 | +; AVX512DQ-NEXT: andl $15, %eax |
| 423 | +; AVX512DQ-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm0, %xmm0 |
| 424 | +; AVX512DQ-NEXT: vpextrw $4, %xmm2, %eax |
| 425 | +; AVX512DQ-NEXT: andl $15, %eax |
| 426 | +; AVX512DQ-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm0, %xmm0 |
| 427 | +; AVX512DQ-NEXT: vpextrw $5, %xmm2, %eax |
| 428 | +; AVX512DQ-NEXT: andl $15, %eax |
| 429 | +; AVX512DQ-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm0, %xmm0 |
| 430 | +; AVX512DQ-NEXT: vpextrw $6, %xmm2, %eax |
| 431 | +; AVX512DQ-NEXT: andl $15, %eax |
| 432 | +; AVX512DQ-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0 |
| 433 | +; AVX512DQ-NEXT: vpextrw $7, %xmm2, %eax |
| 434 | +; AVX512DQ-NEXT: andl $15, %eax |
| 435 | +; AVX512DQ-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0 |
| 436 | +; AVX512DQ-NEXT: vmovd %xmm1, %eax |
| 437 | +; AVX512DQ-NEXT: andl $15, %eax |
| 438 | +; AVX512DQ-NEXT: movzwl (%rsp,%rax,2), %eax |
| 439 | +; AVX512DQ-NEXT: vmovd %eax, %xmm2 |
| 440 | +; AVX512DQ-NEXT: vpextrw $1, %xmm1, %eax |
| 441 | +; AVX512DQ-NEXT: andl $15, %eax |
| 442 | +; AVX512DQ-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm2, %xmm2 |
| 443 | +; AVX512DQ-NEXT: vpextrw $2, %xmm1, %eax |
| 444 | +; AVX512DQ-NEXT: andl $15, %eax |
| 445 | +; AVX512DQ-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm2, %xmm2 |
| 446 | +; AVX512DQ-NEXT: vpextrw $3, %xmm1, %eax |
| 447 | +; AVX512DQ-NEXT: andl $15, %eax |
| 448 | +; AVX512DQ-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm2, %xmm2 |
| 449 | +; AVX512DQ-NEXT: vpextrw $4, %xmm1, %eax |
| 450 | +; AVX512DQ-NEXT: andl $15, %eax |
| 451 | +; AVX512DQ-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm2, %xmm2 |
| 452 | +; AVX512DQ-NEXT: vpextrw $5, %xmm1, %eax |
| 453 | +; AVX512DQ-NEXT: andl $15, %eax |
| 454 | +; AVX512DQ-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm2, %xmm2 |
| 455 | +; AVX512DQ-NEXT: vpextrw $6, %xmm1, %eax |
| 456 | +; AVX512DQ-NEXT: andl $15, %eax |
| 457 | +; AVX512DQ-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm2, %xmm2 |
| 458 | +; AVX512DQ-NEXT: vpextrw $7, %xmm1, %eax |
| 459 | +; AVX512DQ-NEXT: andl $15, %eax |
| 460 | +; AVX512DQ-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm2, %xmm1 |
| 461 | +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 |
| 462 | +; AVX512DQ-NEXT: movq %rbp, %rsp |
| 463 | +; AVX512DQ-NEXT: popq %rbp |
| 464 | +; AVX512DQ-NEXT: retq |
| 465 | +; |
374 | 466 | ; AVX512VL-LABEL: var_shuffle_v16i16:
|
375 | 467 | ; AVX512VL: # %bb.0:
|
376 | 468 | ; AVX512VL-NEXT: pushq %rbp
|
@@ -871,6 +963,133 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
|
871 | 963 | ; AVX512F-NEXT: popq %rbp
|
872 | 964 | ; AVX512F-NEXT: retq
|
873 | 965 | ;
|
| 966 | +; AVX512DQ-LABEL: var_shuffle_v32i8: |
| 967 | +; AVX512DQ: # %bb.0: |
| 968 | +; AVX512DQ-NEXT: pushq %rbp |
| 969 | +; AVX512DQ-NEXT: movq %rsp, %rbp |
| 970 | +; AVX512DQ-NEXT: andq $-32, %rsp |
| 971 | +; AVX512DQ-NEXT: subq $64, %rsp |
| 972 | +; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2 |
| 973 | +; AVX512DQ-NEXT: vpextrb $0, %xmm2, %eax |
| 974 | +; AVX512DQ-NEXT: vmovaps %ymm0, (%rsp) |
| 975 | +; AVX512DQ-NEXT: andl $31, %eax |
| 976 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 977 | +; AVX512DQ-NEXT: vmovd %eax, %xmm0 |
| 978 | +; AVX512DQ-NEXT: vpextrb $1, %xmm2, %eax |
| 979 | +; AVX512DQ-NEXT: andl $31, %eax |
| 980 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 981 | +; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 |
| 982 | +; AVX512DQ-NEXT: vpextrb $2, %xmm2, %eax |
| 983 | +; AVX512DQ-NEXT: andl $31, %eax |
| 984 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 985 | +; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 |
| 986 | +; AVX512DQ-NEXT: vpextrb $3, %xmm2, %eax |
| 987 | +; AVX512DQ-NEXT: andl $31, %eax |
| 988 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 989 | +; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 |
| 990 | +; AVX512DQ-NEXT: vpextrb $4, %xmm2, %eax |
| 991 | +; AVX512DQ-NEXT: andl $31, %eax |
| 992 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 993 | +; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 |
| 994 | +; AVX512DQ-NEXT: vpextrb $5, %xmm2, %eax |
| 995 | +; AVX512DQ-NEXT: andl $31, %eax |
| 996 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 997 | +; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 |
| 998 | +; AVX512DQ-NEXT: vpextrb $6, %xmm2, %eax |
| 999 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1000 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 1001 | +; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 |
| 1002 | +; AVX512DQ-NEXT: vpextrb $7, %xmm2, %eax |
| 1003 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1004 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 1005 | +; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 |
| 1006 | +; AVX512DQ-NEXT: vpextrb $8, %xmm2, %eax |
| 1007 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1008 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 1009 | +; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 |
| 1010 | +; AVX512DQ-NEXT: vpextrb $9, %xmm2, %eax |
| 1011 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1012 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 1013 | +; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 |
| 1014 | +; AVX512DQ-NEXT: vpextrb $10, %xmm2, %eax |
| 1015 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1016 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 1017 | +; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 |
| 1018 | +; AVX512DQ-NEXT: vpextrb $11, %xmm2, %eax |
| 1019 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1020 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 1021 | +; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 |
| 1022 | +; AVX512DQ-NEXT: vpextrb $12, %xmm2, %eax |
| 1023 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1024 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 1025 | +; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 |
| 1026 | +; AVX512DQ-NEXT: vpextrb $13, %xmm2, %eax |
| 1027 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1028 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 1029 | +; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 |
| 1030 | +; AVX512DQ-NEXT: vpextrb $14, %xmm2, %eax |
| 1031 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1032 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 1033 | +; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 |
| 1034 | +; AVX512DQ-NEXT: vpextrb $15, %xmm2, %eax |
| 1035 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1036 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 1037 | +; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 |
| 1038 | +; AVX512DQ-NEXT: vpextrb $0, %xmm1, %eax |
| 1039 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1040 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 1041 | +; AVX512DQ-NEXT: vmovd %eax, %xmm2 |
| 1042 | +; AVX512DQ-NEXT: vpextrb $1, %xmm1, %eax |
| 1043 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1044 | +; AVX512DQ-NEXT: vpinsrb $1, (%rsp,%rax), %xmm2, %xmm2 |
| 1045 | +; AVX512DQ-NEXT: vpextrb $2, %xmm1, %eax |
| 1046 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1047 | +; AVX512DQ-NEXT: vpinsrb $2, (%rsp,%rax), %xmm2, %xmm2 |
| 1048 | +; AVX512DQ-NEXT: vpextrb $3, %xmm1, %eax |
| 1049 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1050 | +; AVX512DQ-NEXT: vpinsrb $3, (%rsp,%rax), %xmm2, %xmm2 |
| 1051 | +; AVX512DQ-NEXT: vpextrb $4, %xmm1, %eax |
| 1052 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1053 | +; AVX512DQ-NEXT: vpinsrb $4, (%rsp,%rax), %xmm2, %xmm2 |
| 1054 | +; AVX512DQ-NEXT: vpextrb $5, %xmm1, %eax |
| 1055 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1056 | +; AVX512DQ-NEXT: vpinsrb $5, (%rsp,%rax), %xmm2, %xmm2 |
| 1057 | +; AVX512DQ-NEXT: vpextrb $6, %xmm1, %eax |
| 1058 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1059 | +; AVX512DQ-NEXT: vpinsrb $6, (%rsp,%rax), %xmm2, %xmm2 |
| 1060 | +; AVX512DQ-NEXT: vpextrb $7, %xmm1, %eax |
| 1061 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1062 | +; AVX512DQ-NEXT: vpinsrb $7, (%rsp,%rax), %xmm2, %xmm2 |
| 1063 | +; AVX512DQ-NEXT: vpextrb $8, %xmm1, %eax |
| 1064 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1065 | +; AVX512DQ-NEXT: vpinsrb $8, (%rsp,%rax), %xmm2, %xmm2 |
| 1066 | +; AVX512DQ-NEXT: vpextrb $9, %xmm1, %eax |
| 1067 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1068 | +; AVX512DQ-NEXT: vpinsrb $9, (%rsp,%rax), %xmm2, %xmm2 |
| 1069 | +; AVX512DQ-NEXT: vpextrb $10, %xmm1, %eax |
| 1070 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1071 | +; AVX512DQ-NEXT: vpinsrb $10, (%rsp,%rax), %xmm2, %xmm2 |
| 1072 | +; AVX512DQ-NEXT: vpextrb $11, %xmm1, %eax |
| 1073 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1074 | +; AVX512DQ-NEXT: vpinsrb $11, (%rsp,%rax), %xmm2, %xmm2 |
| 1075 | +; AVX512DQ-NEXT: vpextrb $12, %xmm1, %eax |
| 1076 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1077 | +; AVX512DQ-NEXT: vpinsrb $12, (%rsp,%rax), %xmm2, %xmm2 |
| 1078 | +; AVX512DQ-NEXT: vpextrb $13, %xmm1, %eax |
| 1079 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1080 | +; AVX512DQ-NEXT: vpinsrb $13, (%rsp,%rax), %xmm2, %xmm2 |
| 1081 | +; AVX512DQ-NEXT: vpextrb $14, %xmm1, %eax |
| 1082 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1083 | +; AVX512DQ-NEXT: vpinsrb $14, (%rsp,%rax), %xmm2, %xmm2 |
| 1084 | +; AVX512DQ-NEXT: vpextrb $15, %xmm1, %eax |
| 1085 | +; AVX512DQ-NEXT: andl $31, %eax |
| 1086 | +; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax |
| 1087 | +; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 |
| 1088 | +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 |
| 1089 | +; AVX512DQ-NEXT: movq %rbp, %rsp |
| 1090 | +; AVX512DQ-NEXT: popq %rbp |
| 1091 | +; AVX512DQ-NEXT: retq |
| 1092 | +; |
874 | 1093 | ; AVX512VL-LABEL: var_shuffle_v32i8:
|
875 | 1094 | ; AVX512VL: # %bb.0:
|
876 | 1095 | ; AVX512VL-NEXT: pushq %rbp
|
@@ -1177,6 +1396,31 @@ define <4 x double> @var_shuffle_v4f64(<4 x double> %v, <4 x i64> %indices) noun
|
1177 | 1396 | ; AVX512F-NEXT: popq %rbp
|
1178 | 1397 | ; AVX512F-NEXT: retq
|
1179 | 1398 | ;
|
| 1399 | +; AVX512DQ-LABEL: var_shuffle_v4f64: |
| 1400 | +; AVX512DQ: # %bb.0: |
| 1401 | +; AVX512DQ-NEXT: pushq %rbp |
| 1402 | +; AVX512DQ-NEXT: movq %rsp, %rbp |
| 1403 | +; AVX512DQ-NEXT: andq $-32, %rsp |
| 1404 | +; AVX512DQ-NEXT: subq $64, %rsp |
| 1405 | +; AVX512DQ-NEXT: vmovq %xmm1, %rax |
| 1406 | +; AVX512DQ-NEXT: andl $3, %eax |
| 1407 | +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rcx |
| 1408 | +; AVX512DQ-NEXT: andl $3, %ecx |
| 1409 | +; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1 |
| 1410 | +; AVX512DQ-NEXT: vmovq %xmm1, %rdx |
| 1411 | +; AVX512DQ-NEXT: andl $3, %edx |
| 1412 | +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi |
| 1413 | +; AVX512DQ-NEXT: andl $3, %esi |
| 1414 | +; AVX512DQ-NEXT: vmovaps %ymm0, (%rsp) |
| 1415 | +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| 1416 | +; AVX512DQ-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] |
| 1417 | +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero |
| 1418 | +; AVX512DQ-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] |
| 1419 | +; AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 |
| 1420 | +; AVX512DQ-NEXT: movq %rbp, %rsp |
| 1421 | +; AVX512DQ-NEXT: popq %rbp |
| 1422 | +; AVX512DQ-NEXT: retq |
| 1423 | +; |
1180 | 1424 | ; AVX512VL-LABEL: var_shuffle_v4f64:
|
1181 | 1425 | ; AVX512VL: # %bb.0:
|
1182 | 1426 | ; AVX512VL-NEXT: vpermpd %ymm0, %ymm1, %ymm0
|
@@ -1338,6 +1582,27 @@ define <4 x i64> @var_shuffle_v4i64_from_v2i64(<2 x i64> %v, <4 x i64> %indices)
|
1338 | 1582 | ; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
1339 | 1583 | ; AVX512F-NEXT: retq
|
1340 | 1584 | ;
|
| 1585 | +; AVX512DQ-LABEL: var_shuffle_v4i64_from_v2i64: |
| 1586 | +; AVX512DQ: # %bb.0: |
| 1587 | +; AVX512DQ-NEXT: vmovq %xmm1, %rax |
| 1588 | +; AVX512DQ-NEXT: andl $1, %eax |
| 1589 | +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rcx |
| 1590 | +; AVX512DQ-NEXT: andl $1, %ecx |
| 1591 | +; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1 |
| 1592 | +; AVX512DQ-NEXT: vmovq %xmm1, %rdx |
| 1593 | +; AVX512DQ-NEXT: andl $1, %edx |
| 1594 | +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi |
| 1595 | +; AVX512DQ-NEXT: andl $1, %esi |
| 1596 | +; AVX512DQ-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| 1597 | +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| 1598 | +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero |
| 1599 | +; AVX512DQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] |
| 1600 | +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero |
| 1601 | +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero |
| 1602 | +; AVX512DQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] |
| 1603 | +; AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 |
| 1604 | +; AVX512DQ-NEXT: retq |
| 1605 | +; |
1341 | 1606 | ; AVX512VL-LABEL: var_shuffle_v4i64_from_v2i64:
|
1342 | 1607 | ; AVX512VL: # %bb.0:
|
1343 | 1608 | ; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
@@ -1601,6 +1866,63 @@ define <16 x i16> @var_shuffle_v16i16_from_v8i16(<8 x i16> %v, <16 x i16> %indic
|
1601 | 1866 | ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
1602 | 1867 | ; AVX512F-NEXT: retq
|
1603 | 1868 | ;
|
| 1869 | +; AVX512DQ-LABEL: var_shuffle_v16i16_from_v8i16: |
| 1870 | +; AVX512DQ: # %bb.0: |
| 1871 | +; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2 |
| 1872 | +; AVX512DQ-NEXT: vmovd %xmm2, %eax |
| 1873 | +; AVX512DQ-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| 1874 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1875 | +; AVX512DQ-NEXT: movzwl -24(%rsp,%rax,2), %eax |
| 1876 | +; AVX512DQ-NEXT: vmovd %eax, %xmm0 |
| 1877 | +; AVX512DQ-NEXT: vpextrw $1, %xmm2, %eax |
| 1878 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1879 | +; AVX512DQ-NEXT: vpinsrw $1, -24(%rsp,%rax,2), %xmm0, %xmm0 |
| 1880 | +; AVX512DQ-NEXT: vpextrw $2, %xmm2, %eax |
| 1881 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1882 | +; AVX512DQ-NEXT: vpinsrw $2, -24(%rsp,%rax,2), %xmm0, %xmm0 |
| 1883 | +; AVX512DQ-NEXT: vpextrw $3, %xmm2, %eax |
| 1884 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1885 | +; AVX512DQ-NEXT: vpinsrw $3, -24(%rsp,%rax,2), %xmm0, %xmm0 |
| 1886 | +; AVX512DQ-NEXT: vpextrw $4, %xmm2, %eax |
| 1887 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1888 | +; AVX512DQ-NEXT: vpinsrw $4, -24(%rsp,%rax,2), %xmm0, %xmm0 |
| 1889 | +; AVX512DQ-NEXT: vpextrw $5, %xmm2, %eax |
| 1890 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1891 | +; AVX512DQ-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm0, %xmm0 |
| 1892 | +; AVX512DQ-NEXT: vpextrw $6, %xmm2, %eax |
| 1893 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1894 | +; AVX512DQ-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm0, %xmm0 |
| 1895 | +; AVX512DQ-NEXT: vpextrw $7, %xmm2, %eax |
| 1896 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1897 | +; AVX512DQ-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm0, %xmm0 |
| 1898 | +; AVX512DQ-NEXT: vmovd %xmm1, %eax |
| 1899 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1900 | +; AVX512DQ-NEXT: movzwl -24(%rsp,%rax,2), %eax |
| 1901 | +; AVX512DQ-NEXT: vmovd %eax, %xmm2 |
| 1902 | +; AVX512DQ-NEXT: vpextrw $1, %xmm1, %eax |
| 1903 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1904 | +; AVX512DQ-NEXT: vpinsrw $1, -24(%rsp,%rax,2), %xmm2, %xmm2 |
| 1905 | +; AVX512DQ-NEXT: vpextrw $2, %xmm1, %eax |
| 1906 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1907 | +; AVX512DQ-NEXT: vpinsrw $2, -24(%rsp,%rax,2), %xmm2, %xmm2 |
| 1908 | +; AVX512DQ-NEXT: vpextrw $3, %xmm1, %eax |
| 1909 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1910 | +; AVX512DQ-NEXT: vpinsrw $3, -24(%rsp,%rax,2), %xmm2, %xmm2 |
| 1911 | +; AVX512DQ-NEXT: vpextrw $4, %xmm1, %eax |
| 1912 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1913 | +; AVX512DQ-NEXT: vpinsrw $4, -24(%rsp,%rax,2), %xmm2, %xmm2 |
| 1914 | +; AVX512DQ-NEXT: vpextrw $5, %xmm1, %eax |
| 1915 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1916 | +; AVX512DQ-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm2, %xmm2 |
| 1917 | +; AVX512DQ-NEXT: vpextrw $6, %xmm1, %eax |
| 1918 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1919 | +; AVX512DQ-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm2, %xmm2 |
| 1920 | +; AVX512DQ-NEXT: vpextrw $7, %xmm1, %eax |
| 1921 | +; AVX512DQ-NEXT: andl $7, %eax |
| 1922 | +; AVX512DQ-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm2, %xmm1 |
| 1923 | +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 |
| 1924 | +; AVX512DQ-NEXT: retq |
| 1925 | +; |
1604 | 1926 | ; AVX512VL-LABEL: var_shuffle_v16i16_from_v8i16:
|
1605 | 1927 | ; AVX512VL: # %bb.0:
|
1606 | 1928 | ; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm2
|
@@ -2078,6 +2400,127 @@ define <32 x i8> @var_shuffle_v32i8_from_v16i8(<16 x i8> %v, <32 x i8> %indices)
|
2078 | 2400 | ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
2079 | 2401 | ; AVX512F-NEXT: retq
|
2080 | 2402 | ;
|
| 2403 | +; AVX512DQ-LABEL: var_shuffle_v32i8_from_v16i8: |
| 2404 | +; AVX512DQ: # %bb.0: |
| 2405 | +; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2 |
| 2406 | +; AVX512DQ-NEXT: vpextrb $0, %xmm2, %eax |
| 2407 | +; AVX512DQ-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| 2408 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2409 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2410 | +; AVX512DQ-NEXT: vmovd %eax, %xmm0 |
| 2411 | +; AVX512DQ-NEXT: vpextrb $1, %xmm2, %eax |
| 2412 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2413 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2414 | +; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 |
| 2415 | +; AVX512DQ-NEXT: vpextrb $2, %xmm2, %eax |
| 2416 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2417 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2418 | +; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 |
| 2419 | +; AVX512DQ-NEXT: vpextrb $3, %xmm2, %eax |
| 2420 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2421 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2422 | +; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 |
| 2423 | +; AVX512DQ-NEXT: vpextrb $4, %xmm2, %eax |
| 2424 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2425 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2426 | +; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 |
| 2427 | +; AVX512DQ-NEXT: vpextrb $5, %xmm2, %eax |
| 2428 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2429 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2430 | +; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 |
| 2431 | +; AVX512DQ-NEXT: vpextrb $6, %xmm2, %eax |
| 2432 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2433 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2434 | +; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 |
| 2435 | +; AVX512DQ-NEXT: vpextrb $7, %xmm2, %eax |
| 2436 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2437 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2438 | +; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 |
| 2439 | +; AVX512DQ-NEXT: vpextrb $8, %xmm2, %eax |
| 2440 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2441 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2442 | +; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 |
| 2443 | +; AVX512DQ-NEXT: vpextrb $9, %xmm2, %eax |
| 2444 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2445 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2446 | +; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 |
| 2447 | +; AVX512DQ-NEXT: vpextrb $10, %xmm2, %eax |
| 2448 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2449 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2450 | +; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 |
| 2451 | +; AVX512DQ-NEXT: vpextrb $11, %xmm2, %eax |
| 2452 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2453 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2454 | +; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 |
| 2455 | +; AVX512DQ-NEXT: vpextrb $12, %xmm2, %eax |
| 2456 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2457 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2458 | +; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 |
| 2459 | +; AVX512DQ-NEXT: vpextrb $13, %xmm2, %eax |
| 2460 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2461 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2462 | +; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 |
| 2463 | +; AVX512DQ-NEXT: vpextrb $14, %xmm2, %eax |
| 2464 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2465 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2466 | +; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 |
| 2467 | +; AVX512DQ-NEXT: vpextrb $15, %xmm2, %eax |
| 2468 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2469 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2470 | +; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 |
| 2471 | +; AVX512DQ-NEXT: vpextrb $0, %xmm1, %eax |
| 2472 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2473 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2474 | +; AVX512DQ-NEXT: vmovd %eax, %xmm2 |
| 2475 | +; AVX512DQ-NEXT: vpextrb $1, %xmm1, %eax |
| 2476 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2477 | +; AVX512DQ-NEXT: vpinsrb $1, -24(%rsp,%rax), %xmm2, %xmm2 |
| 2478 | +; AVX512DQ-NEXT: vpextrb $2, %xmm1, %eax |
| 2479 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2480 | +; AVX512DQ-NEXT: vpinsrb $2, -24(%rsp,%rax), %xmm2, %xmm2 |
| 2481 | +; AVX512DQ-NEXT: vpextrb $3, %xmm1, %eax |
| 2482 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2483 | +; AVX512DQ-NEXT: vpinsrb $3, -24(%rsp,%rax), %xmm2, %xmm2 |
| 2484 | +; AVX512DQ-NEXT: vpextrb $4, %xmm1, %eax |
| 2485 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2486 | +; AVX512DQ-NEXT: vpinsrb $4, -24(%rsp,%rax), %xmm2, %xmm2 |
| 2487 | +; AVX512DQ-NEXT: vpextrb $5, %xmm1, %eax |
| 2488 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2489 | +; AVX512DQ-NEXT: vpinsrb $5, -24(%rsp,%rax), %xmm2, %xmm2 |
| 2490 | +; AVX512DQ-NEXT: vpextrb $6, %xmm1, %eax |
| 2491 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2492 | +; AVX512DQ-NEXT: vpinsrb $6, -24(%rsp,%rax), %xmm2, %xmm2 |
| 2493 | +; AVX512DQ-NEXT: vpextrb $7, %xmm1, %eax |
| 2494 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2495 | +; AVX512DQ-NEXT: vpinsrb $7, -24(%rsp,%rax), %xmm2, %xmm2 |
| 2496 | +; AVX512DQ-NEXT: vpextrb $8, %xmm1, %eax |
| 2497 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2498 | +; AVX512DQ-NEXT: vpinsrb $8, -24(%rsp,%rax), %xmm2, %xmm2 |
| 2499 | +; AVX512DQ-NEXT: vpextrb $9, %xmm1, %eax |
| 2500 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2501 | +; AVX512DQ-NEXT: vpinsrb $9, -24(%rsp,%rax), %xmm2, %xmm2 |
| 2502 | +; AVX512DQ-NEXT: vpextrb $10, %xmm1, %eax |
| 2503 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2504 | +; AVX512DQ-NEXT: vpinsrb $10, -24(%rsp,%rax), %xmm2, %xmm2 |
| 2505 | +; AVX512DQ-NEXT: vpextrb $11, %xmm1, %eax |
| 2506 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2507 | +; AVX512DQ-NEXT: vpinsrb $11, -24(%rsp,%rax), %xmm2, %xmm2 |
| 2508 | +; AVX512DQ-NEXT: vpextrb $12, %xmm1, %eax |
| 2509 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2510 | +; AVX512DQ-NEXT: vpinsrb $12, -24(%rsp,%rax), %xmm2, %xmm2 |
| 2511 | +; AVX512DQ-NEXT: vpextrb $13, %xmm1, %eax |
| 2512 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2513 | +; AVX512DQ-NEXT: vpinsrb $13, -24(%rsp,%rax), %xmm2, %xmm2 |
| 2514 | +; AVX512DQ-NEXT: vpextrb $14, %xmm1, %eax |
| 2515 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2516 | +; AVX512DQ-NEXT: vpinsrb $14, -24(%rsp,%rax), %xmm2, %xmm2 |
| 2517 | +; AVX512DQ-NEXT: vpextrb $15, %xmm1, %eax |
| 2518 | +; AVX512DQ-NEXT: andl $15, %eax |
| 2519 | +; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax |
| 2520 | +; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 |
| 2521 | +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 |
| 2522 | +; AVX512DQ-NEXT: retq |
| 2523 | +; |
2081 | 2524 | ; AVX512VL-LABEL: var_shuffle_v32i8_from_v16i8:
|
2082 | 2525 | ; AVX512VL: # %bb.0:
|
2083 | 2526 | ; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm2
|
@@ -2361,6 +2804,25 @@ define <4 x double> @var_shuffle_v4f64_from_v2f64(<2 x double> %v, <4 x i64> %in
|
2361 | 2804 | ; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
2362 | 2805 | ; AVX512F-NEXT: retq
|
2363 | 2806 | ;
|
| 2807 | +; AVX512DQ-LABEL: var_shuffle_v4f64_from_v2f64: |
| 2808 | +; AVX512DQ: # %bb.0: |
| 2809 | +; AVX512DQ-NEXT: vmovq %xmm1, %rax |
| 2810 | +; AVX512DQ-NEXT: andl $1, %eax |
| 2811 | +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rcx |
| 2812 | +; AVX512DQ-NEXT: andl $1, %ecx |
| 2813 | +; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1 |
| 2814 | +; AVX512DQ-NEXT: vmovq %xmm1, %rdx |
| 2815 | +; AVX512DQ-NEXT: andl $1, %edx |
| 2816 | +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi |
| 2817 | +; AVX512DQ-NEXT: andl $1, %esi |
| 2818 | +; AVX512DQ-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| 2819 | +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| 2820 | +; AVX512DQ-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] |
| 2821 | +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero |
| 2822 | +; AVX512DQ-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] |
| 2823 | +; AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 |
| 2824 | +; AVX512DQ-NEXT: retq |
| 2825 | +; |
2364 | 2826 | ; AVX512VL-LABEL: var_shuffle_v4f64_from_v2f64:
|
2365 | 2827 | ; AVX512VL: # %bb.0:
|
2366 | 2828 | ; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
|
0 commit comments