Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -5890,45 +5890,48 @@ multiclass SS41I_pmovx_rm_all opc, string OpcodeStr, X86MemOperand MemOp, X86MemOperand MemYOp, OpndItins SSEItins, OpndItins AVXItins, - OpndItins AVX2Itins> { + OpndItins AVX2Itins, Predicate prd> { defm NAME : SS41I_pmovx_rrrm; - let Predicates = [HasAVX, NoVLX] in + let Predicates = [HasAVX, prd] in defm V#NAME : SS41I_pmovx_rrrm, VEX; - let Predicates = [HasAVX2, NoVLX] in + let Predicates = [HasAVX2, prd] in defm V#NAME#Y : SS41I_pmovx_rrrm, VEX, VEX_L; } -multiclass SS41I_pmovx_rm opc, string OpcodeStr, - X86MemOperand MemOp, X86MemOperand MemYOp> { +multiclass SS41I_pmovx_rm opc, string OpcodeStr, X86MemOperand MemOp, + X86MemOperand MemYOp, Predicate prd> { defm PMOVSX#NAME : SS41I_pmovx_rm_all; + DEFAULT_ITINS_SHUFFLESCHED, prd>; defm PMOVZX#NAME : SS41I_pmovx_rm_all; + DEFAULT_ITINS_SHUFFLESCHED, prd>; } -defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem>; -defm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem>; -defm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem>; +defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>; +defm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem, NoVLX>; +defm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem, NoVLX>; -defm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem>; -defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem>; +defm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem, NoVLX>; +defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>; -defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem>; +defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>; // AVX2 Patterns multiclass SS41I_pmovx_avx2_patterns { // Register-Register patterns + let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))), (!cast(OpcPrefix#BWYrr) VR128:$src)>; + } + let Predicates = [HasAVX, NoVLX] in { def : Pat<(v8i32 (ExtOp (v16i8 VR128:$src))), (!cast(OpcPrefix#BDYrr) VR128:$src)>; def : Pat<(v4i64 (ExtOp (v16i8 VR128:$src))), @@ -5941,10 +5944,13 @@ def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))), (!cast(OpcPrefix#DQYrr) VR128:$src)>; - + } // On AVX2, we also support 256bit inputs. + let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v16i16 (ExtOp (v32i8 VR256:$src))), (!cast(OpcPrefix#BWYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + } + let Predicates = [HasAVX, NoVLX] in { def : Pat<(v8i32 (ExtOp (v32i8 VR256:$src))), (!cast(OpcPrefix#BDYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; def : Pat<(v4i64 (ExtOp (v32i8 VR256:$src))), @@ -5957,10 +5963,14 @@ def : Pat<(v4i64 (ExtOp (v8i32 VR256:$src))), (!cast(OpcPrefix#DQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + } // Simple Register-Memory patterns + let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v16i16 (!cast(ExtTy#"extloadvi8") addr:$src)), (!cast(OpcPrefix#BWYrm) addr:$src)>; + } + let Predicates = [HasAVX, NoVLX] in { def : Pat<(v8i32 (!cast(ExtTy#"extloadvi8") addr:$src)), (!cast(OpcPrefix#BDYrm) addr:$src)>; def : Pat<(v4i64 (!cast(ExtTy#"extloadvi8") addr:$src)), @@ -5973,8 +5983,10 @@ def : Pat<(v4i64 (!cast(ExtTy#"extloadvi32") addr:$src)), (!cast(OpcPrefix#DQYrm) addr:$src)>; - + } + // AVX2 Register-Memory patterns + let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#BWYrm) addr:$src)>; def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), @@ -5983,7 +5995,8 @@ (!cast(OpcPrefix#BWYrm) addr:$src)>; def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#BWYrm) addr:$src)>; - + } + let Predicates = [HasAVX, NoVLX] in { def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#BDYrm) addr:$src)>; def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), @@ -6028,18 +6041,20 @@ (!cast(OpcPrefix#DQYrm) addr:$src)>; def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#DQYrm) addr:$src)>; + } } -let Predicates = [HasAVX2, NoVLX] in { - defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>; - defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>; -} +defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>; +defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>; // SSE4.1/AVX patterns. multiclass SS41I_pmovx_patterns { + let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))), (!cast(OpcPrefix#BWrr) VR128:$src)>; + } + let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))), (!cast(OpcPrefix#BDrr) VR128:$src)>; def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))), @@ -6052,9 +6067,12 @@ def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))), (!cast(OpcPrefix#DQrr) VR128:$src)>; - + } + let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v8i16 (!cast(ExtTy#"extloadvi8") addr:$src)), (!cast(OpcPrefix#BWrm) addr:$src)>; + } + let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4i32 (!cast(ExtTy#"extloadvi8") addr:$src)), (!cast(OpcPrefix#BDrm) addr:$src)>; def : Pat<(v2i64 (!cast(ExtTy#"extloadvi8") addr:$src)), @@ -6067,7 +6085,8 @@ def : Pat<(v2i64 (!cast(ExtTy#"extloadvi32") addr:$src)), (!cast(OpcPrefix#DQrm) addr:$src)>; - + } + let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#BWrm) addr:$src)>; def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), @@ -6078,7 +6097,8 @@ (!cast(OpcPrefix#BWrm) addr:$src)>; def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#BWrm) addr:$src)>; - + } + let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#BDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), @@ -6127,12 +6147,11 @@ (!cast(OpcPrefix#DQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#DQrm) addr:$src)>; + } } -let Predicates = [HasAVX, NoVLX] in { - defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>; - defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>; -} +defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>; +defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>; let Predicates = [UseSSE41] in { defm : SS41I_pmovx_patterns<"PMOVSX", "s", X86vsext, extloadi32i16>; Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX define i16 @mask16(i16 %x) { @@ -23,6 +24,13 @@ ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: retq ; +; AVX512VL-LABEL: mask8: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: kmovw %edi, %k0 +; AVX512VL-NEXT: knotw %k0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq +; ; SKX-LABEL: mask8: ; SKX: ## BB#0: ; SKX-NEXT: kmovb %edi, %k0 @@ -58,6 +66,13 @@ ; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: retq ; +; AVX512VL-LABEL: mask8_mem: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: kmovw (%rdi), %k0 +; AVX512VL-NEXT: knotw %k0, %k0 +; AVX512VL-NEXT: kmovw %k0, (%rdi) +; AVX512VL-NEXT: retq +; ; SKX-LABEL: mask8_mem: ; SKX: ## BB#0: ; SKX-NEXT: kmovb (%rdi), %k0 @@ -99,6 +114,13 @@ ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: retq ; +; AVX512VL-LABEL: shuf_test1: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: kmovw %edi, %k0 +; AVX512VL-NEXT: kshiftrw $8, %k0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq +; ; SKX-LABEL: shuf_test1: ; SKX: ## BB#0: ; SKX-NEXT: kmovw %edi, %k0 @@ -146,6 +168,15 @@ ; KNL-NEXT: movb $-2, %al ; KNL-NEXT: retq ; +; AVX512VL-LABEL: conv1: +; AVX512VL: ## BB#0: ## %entry +; AVX512VL-NEXT: kxnorw %k0, %k0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: movb %al, (%rdi) +; AVX512VL-NEXT: movb $-2, -{{[0-9]+}}(%rsp) +; AVX512VL-NEXT: movb $-2, %al +; AVX512VL-NEXT: retq +; ; SKX-LABEL: conv1: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kxnorw %k0, %k0, %k0 @@ -177,6 +208,24 @@ ; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; KNL-NEXT: retq ; +; AVX512VL-LABEL: test4: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 +; AVX512VL-NEXT: knotw %k0, %k1 +; AVX512VL-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX512VL-NEXT: movb -{{[0-9]+}}(%rsp), %al +; AVX512VL-NEXT: xorl %ecx, %ecx +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: movl $-1, %eax +; AVX512VL-NEXT: cmovel %ecx, %eax +; AVX512VL-NEXT: vmovd %eax, %xmm0 +; AVX512VL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; ; SKX-LABEL: test4: ; SKX: ## BB#0: ; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 @@ -199,6 +248,22 @@ ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; KNL-NEXT: retq ; +; AVX512VL-LABEL: test5: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 +; AVX512VL-NEXT: knotw %k0, %k1 +; AVX512VL-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1} +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX512VL-NEXT: movb -{{[0-9]+}}(%rsp), %al +; AVX512VL-NEXT: xorl %ecx, %ecx +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: movq $-1, %rax +; AVX512VL-NEXT: cmoveq %rcx, %rax +; AVX512VL-NEXT: vmovq %rax, %xmm0 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0,0] +; AVX512VL-NEXT: retq +; ; SKX-LABEL: test5: ; SKX: ## BB#0: ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 @@ -237,6 +302,18 @@ ; KNL-NEXT: testb %al, %al ; KNL-NEXT: retq ; +; AVX512VL-LABEL: test7: +; AVX512VL: ## BB#0: ## %allocas +; AVX512VL-NEXT: vpmovsxwq %xmm0, %zmm0 +; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512VL-NEXT: movb $85, %al +; AVX512VL-NEXT: kmovw %eax, %k1 +; AVX512VL-NEXT: korw %k1, %k0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: retq +; ; SKX-LABEL: test7: ; SKX: ## BB#0: ## %allocas ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -275,6 +352,21 @@ ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: retq ; +; AVX512VL-LABEL: test8: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; AVX512VL-NEXT: cmpl %esi, %edi +; AVX512VL-NEXT: jg LBB14_1 +; AVX512VL-NEXT: ## BB#2: +; AVX512VL-NEXT: vpcmpltud %zmm2, %zmm1, %k1 +; AVX512VL-NEXT: jmp LBB14_3 +; AVX512VL-NEXT: LBB14_1: +; AVX512VL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 +; AVX512VL-NEXT: LBB14_3: +; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VL-NEXT: retq +; ; SKX-LABEL: test8: ; SKX: ## BB#0: ; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2 @@ -312,6 +404,22 @@ ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: retq ; +; AVX512VL-LABEL: test9: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: cmpl %esi, %edi +; AVX512VL-NEXT: jg LBB15_1 +; AVX512VL-NEXT: ## BB#2: +; AVX512VL-NEXT: vpmovsxbd %xmm1, %zmm0 +; AVX512VL-NEXT: jmp LBB15_3 +; AVX512VL-NEXT: LBB15_1: +; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512VL-NEXT: LBB15_3: +; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VL-NEXT: retq +; ; SKX-LABEL: test9: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi @@ -344,6 +452,30 @@ ; KNL-NEXT: LBB17_2: ; KNL-NEXT: retq ; +; AVX512VL-LABEL: test11: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: cmpl %esi, %edi +; AVX512VL-NEXT: jg LBB17_1 +; AVX512VL-NEXT: ## BB#2: +; AVX512VL-NEXT: vpslld $31, %xmm1, %xmm0 +; AVX512VL-NEXT: jmp LBB17_3 +; AVX512VL-NEXT: LBB17_1: +; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512VL-NEXT: LBB17_3: +; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX512VL-NEXT: movb -{{[0-9]+}}(%rsp), %al +; AVX512VL-NEXT: xorl %ecx, %ecx +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: movl $-1, %eax +; AVX512VL-NEXT: cmovel %ecx, %eax +; AVX512VL-NEXT: vmovd %eax, %xmm0 +; AVX512VL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; ; SKX-LABEL: test11: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi @@ -401,6 +533,17 @@ ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: retq ; +; AVX512VL-LABEL: test15: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: cmpl %esi, %edi +; AVX512VL-NEXT: movw $21845, %ax ## imm = 0x5555 +; AVX512VL-NEXT: movw $1, %cx +; AVX512VL-NEXT: cmovgw %ax, %cx +; AVX512VL-NEXT: kmovw %ecx, %k1 +; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VL-NEXT: retq +; ; SKX-LABEL: test15: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi @@ -857,6 +1000,445 @@ ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq ; +; AVX512VL-LABEL: test16: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: pushq %rbp +; AVX512VL-NEXT: Ltmp0: +; AVX512VL-NEXT: .cfi_def_cfa_offset 16 +; AVX512VL-NEXT: Ltmp1: +; AVX512VL-NEXT: .cfi_offset %rbp, -16 +; AVX512VL-NEXT: movq %rsp, %rbp +; AVX512VL-NEXT: Ltmp2: +; AVX512VL-NEXT: .cfi_def_cfa_register %rbp +; AVX512VL-NEXT: pushq %r15 +; AVX512VL-NEXT: pushq %r14 +; AVX512VL-NEXT: pushq %r13 +; AVX512VL-NEXT: pushq %r12 +; AVX512VL-NEXT: pushq %rbx +; AVX512VL-NEXT: andq $-32, %rsp +; AVX512VL-NEXT: subq $128, %rsp +; AVX512VL-NEXT: Ltmp3: +; AVX512VL-NEXT: .cfi_offset %rbx, -56 +; AVX512VL-NEXT: Ltmp4: +; AVX512VL-NEXT: .cfi_offset %r12, -48 +; AVX512VL-NEXT: Ltmp5: +; AVX512VL-NEXT: .cfi_offset %r13, -40 +; AVX512VL-NEXT: Ltmp6: +; AVX512VL-NEXT: .cfi_offset %r14, -32 +; AVX512VL-NEXT: Ltmp7: +; AVX512VL-NEXT: .cfi_offset %r15, -24 +; AVX512VL-NEXT: movq %rdi, %rax +; AVX512VL-NEXT: shrq $32, %rax +; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; AVX512VL-NEXT: movl $271, %eax ## imm = 0x10F +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: movl %edi, %ecx +; AVX512VL-NEXT: andl $1, %ecx +; AVX512VL-NEXT: vmovd %ecx, %xmm0 +; AVX512VL-NEXT: movl $257, %ecx ## imm = 0x101 +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $258, %ecx ## imm = 0x102 +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $259, %ecx ## imm = 0x103 +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $260, %ecx ## imm = 0x104 +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $261, %ecx ## imm = 0x105 +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $262, %ecx ## imm = 0x106 +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $263, %ecx ## imm = 0x107 +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $264, %ecx ## imm = 0x108 +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $265, %ecx ## imm = 0x109 +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $266, %ecx ## imm = 0x10A +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $267, %ecx ## imm = 0x10B +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $268, %ecx ## imm = 0x10C +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $269, %ecx ## imm = 0x10D +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $270, %ecx ## imm = 0x10E +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1 +; AVX512VL-NEXT: movl $1, %eax +; AVX512VL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm0 +; AVX512VL-NEXT: movl {{[0-9]+}}(%rsp), %r15d +; AVX512VL-NEXT: movq %r15, %rdx +; AVX512VL-NEXT: shrq $17, %rdx +; AVX512VL-NEXT: andb $1, %dl +; AVX512VL-NEXT: je LBB22_2 +; AVX512VL-NEXT: ## BB#1: +; AVX512VL-NEXT: movb $-1, %dl +; AVX512VL-NEXT: LBB22_2: +; AVX512VL-NEXT: movq %r15, %r11 +; AVX512VL-NEXT: shrq $16, %r11 +; AVX512VL-NEXT: andb $1, %r11b +; AVX512VL-NEXT: je LBB22_4 +; AVX512VL-NEXT: ## BB#3: +; AVX512VL-NEXT: movb $-1, %r11b +; AVX512VL-NEXT: LBB22_4: +; AVX512VL-NEXT: movq %r15, %r10 +; AVX512VL-NEXT: shrq $18, %r10 +; AVX512VL-NEXT: andb $1, %r10b +; AVX512VL-NEXT: je LBB22_6 +; AVX512VL-NEXT: ## BB#5: +; AVX512VL-NEXT: movb $-1, %r10b +; AVX512VL-NEXT: LBB22_6: +; AVX512VL-NEXT: movq %r15, %r9 +; AVX512VL-NEXT: shrq $19, %r9 +; AVX512VL-NEXT: andb $1, %r9b +; AVX512VL-NEXT: je LBB22_8 +; AVX512VL-NEXT: ## BB#7: +; AVX512VL-NEXT: movb $-1, %r9b +; AVX512VL-NEXT: LBB22_8: +; AVX512VL-NEXT: movq %r15, %rbx +; AVX512VL-NEXT: shrq $20, %rbx +; AVX512VL-NEXT: andb $1, %bl +; AVX512VL-NEXT: je LBB22_10 +; AVX512VL-NEXT: ## BB#9: +; AVX512VL-NEXT: movb $-1, %bl +; AVX512VL-NEXT: LBB22_10: +; AVX512VL-NEXT: movq %r15, %r12 +; AVX512VL-NEXT: shrq $21, %r12 +; AVX512VL-NEXT: andb $1, %r12b +; AVX512VL-NEXT: je LBB22_12 +; AVX512VL-NEXT: ## BB#11: +; AVX512VL-NEXT: movb $-1, %r12b +; AVX512VL-NEXT: LBB22_12: +; AVX512VL-NEXT: movq %r15, %r14 +; AVX512VL-NEXT: shrq $22, %r14 +; AVX512VL-NEXT: andb $1, %r14b +; AVX512VL-NEXT: je LBB22_14 +; AVX512VL-NEXT: ## BB#13: +; AVX512VL-NEXT: movb $-1, %r14b +; AVX512VL-NEXT: LBB22_14: +; AVX512VL-NEXT: movq %r15, %r8 +; AVX512VL-NEXT: shrq $23, %r8 +; AVX512VL-NEXT: andb $1, %r8b +; AVX512VL-NEXT: je LBB22_16 +; AVX512VL-NEXT: ## BB#15: +; AVX512VL-NEXT: movb $-1, %r8b +; AVX512VL-NEXT: LBB22_16: +; AVX512VL-NEXT: movq %r15, %r13 +; AVX512VL-NEXT: shrq $24, %r13 +; AVX512VL-NEXT: andb $1, %r13b +; AVX512VL-NEXT: je LBB22_18 +; AVX512VL-NEXT: ## BB#17: +; AVX512VL-NEXT: movb $-1, %r13b +; AVX512VL-NEXT: LBB22_18: +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $25, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_20 +; AVX512VL-NEXT: ## BB#19: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_20: +; AVX512VL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $26, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_22 +; AVX512VL-NEXT: ## BB#21: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_22: +; AVX512VL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; AVX512VL-NEXT: movl $272, %esi ## imm = 0x110 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $27, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_24 +; AVX512VL-NEXT: ## BB#23: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_24: +; AVX512VL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; AVX512VL-NEXT: movl $273, %eax ## imm = 0x111 +; AVX512VL-NEXT: bextrl %esi, %edi, %esi +; AVX512VL-NEXT: movq %r15, %rcx +; AVX512VL-NEXT: shrq $28, %rcx +; AVX512VL-NEXT: andb $1, %cl +; AVX512VL-NEXT: je LBB22_26 +; AVX512VL-NEXT: ## BB#25: +; AVX512VL-NEXT: movb $-1, %cl +; AVX512VL-NEXT: LBB22_26: +; AVX512VL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vmovd %esi, %xmm2 +; AVX512VL-NEXT: movl $274, %esi ## imm = 0x112 +; AVX512VL-NEXT: movq %r15, %rcx +; AVX512VL-NEXT: shrq $29, %rcx +; AVX512VL-NEXT: andb $1, %cl +; AVX512VL-NEXT: je LBB22_28 +; AVX512VL-NEXT: ## BB#27: +; AVX512VL-NEXT: movb $-1, %cl +; AVX512VL-NEXT: LBB22_28: +; AVX512VL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill +; AVX512VL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %esi, %edi, %eax +; AVX512VL-NEXT: movzbl %r11b, %esi +; AVX512VL-NEXT: movq %r15, %rcx +; AVX512VL-NEXT: shrq $30, %rcx +; AVX512VL-NEXT: andb $1, %cl +; AVX512VL-NEXT: je LBB22_30 +; AVX512VL-NEXT: ## BB#29: +; AVX512VL-NEXT: movb $-1, %cl +; AVX512VL-NEXT: LBB22_30: +; AVX512VL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX512VL-NEXT: movl $275, %eax ## imm = 0x113 +; AVX512VL-NEXT: bextrl %eax, %edi, %r11d +; AVX512VL-NEXT: movzbl %dl, %edx +; AVX512VL-NEXT: vmovd %esi, %xmm3 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $31, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_32 +; AVX512VL-NEXT: ## BB#31: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_32: +; AVX512VL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; AVX512VL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill +; AVX512VL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2 +; AVX512VL-NEXT: movl $276, %eax ## imm = 0x114 +; AVX512VL-NEXT: bextrl %eax, %edi, %esi +; AVX512VL-NEXT: movl $277, %r11d ## imm = 0x115 +; AVX512VL-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3 +; AVX512VL-NEXT: movzbl %r10b, %r10d +; AVX512VL-NEXT: movb %r15b, %al +; AVX512VL-NEXT: shrb %al +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_34 +; AVX512VL-NEXT: ## BB#33: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_34: +; AVX512VL-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %r11d, %edi, %edx +; AVX512VL-NEXT: movl $278, %r11d ## imm = 0x116 +; AVX512VL-NEXT: vpinsrb $2, %r10d, %xmm3, %xmm3 +; AVX512VL-NEXT: movzbl %r9b, %esi +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: movq %r15, %rcx +; AVX512VL-NEXT: shlq $63, %rcx +; AVX512VL-NEXT: sarq $63, %rcx +; AVX512VL-NEXT: vmovd %ecx, %xmm4 +; AVX512VL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movb %r15b, %al +; AVX512VL-NEXT: shrb $2, %al +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_36 +; AVX512VL-NEXT: ## BB#35: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_36: +; AVX512VL-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %r11d, %edi, %edx +; AVX512VL-NEXT: movl $279, %r9d ## imm = 0x117 +; AVX512VL-NEXT: vpinsrb $3, %esi, %xmm3, %xmm3 +; AVX512VL-NEXT: movzbl %bl, %ebx +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movb %r15b, %al +; AVX512VL-NEXT: shrb $3, %al +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_38 +; AVX512VL-NEXT: ## BB#37: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_38: +; AVX512VL-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %r9d, %edi, %edx +; AVX512VL-NEXT: movl $280, %esi ## imm = 0x118 +; AVX512VL-NEXT: vpinsrb $4, %ebx, %xmm3, %xmm3 +; AVX512VL-NEXT: movzbl %r12b, %ebx +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movb %r15b, %al +; AVX512VL-NEXT: shrb $4, %al +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_40 +; AVX512VL-NEXT: ## BB#39: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_40: +; AVX512VL-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %esi, %edi, %ecx +; AVX512VL-NEXT: movl $281, %edx ## imm = 0x119 +; AVX512VL-NEXT: vpinsrb $5, %ebx, %xmm3, %xmm3 +; AVX512VL-NEXT: movzbl %r14b, %esi +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movb %r15b, %al +; AVX512VL-NEXT: shrb $5, %al +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_42 +; AVX512VL-NEXT: ## BB#41: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_42: +; AVX512VL-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %edx, %edi, %ecx +; AVX512VL-NEXT: movl $282, %edx ## imm = 0x11A +; AVX512VL-NEXT: vpinsrb $6, %esi, %xmm3, %xmm3 +; AVX512VL-NEXT: movzbl %r8b, %esi +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movb %r15b, %bl +; AVX512VL-NEXT: shrb $6, %bl +; AVX512VL-NEXT: andb $1, %bl +; AVX512VL-NEXT: je LBB22_44 +; AVX512VL-NEXT: ## BB#43: +; AVX512VL-NEXT: movb $-1, %bl +; AVX512VL-NEXT: LBB22_44: +; AVX512VL-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %edx, %edi, %eax +; AVX512VL-NEXT: movl $283, %ecx ## imm = 0x11B +; AVX512VL-NEXT: vpinsrb $7, %esi, %xmm3, %xmm3 +; AVX512VL-NEXT: movzbl %r13b, %esi +; AVX512VL-NEXT: movzbl %bl, %edx +; AVX512VL-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4 +; AVX512VL-NEXT: movb %r15b, %bl +; AVX512VL-NEXT: shrb $7, %bl +; AVX512VL-NEXT: je LBB22_46 +; AVX512VL-NEXT: ## BB#45: +; AVX512VL-NEXT: movb $-1, %bl +; AVX512VL-NEXT: LBB22_46: +; AVX512VL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: movl $284, %edx ## imm = 0x11C +; AVX512VL-NEXT: vpinsrb $8, %esi, %xmm3, %xmm3 +; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rax ## 8-byte Reload +; AVX512VL-NEXT: movzbl %al, %esi +; AVX512VL-NEXT: movzbl %bl, %eax +; AVX512VL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $8, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_48 +; AVX512VL-NEXT: ## BB#47: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_48: +; AVX512VL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %edx, %edi, %ecx +; AVX512VL-NEXT: movl $285, %edx ## imm = 0x11D +; AVX512VL-NEXT: vpinsrb $9, %esi, %xmm3, %xmm3 +; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload +; AVX512VL-NEXT: movzbl %sil, %esi +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $9, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_50 +; AVX512VL-NEXT: ## BB#49: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_50: +; AVX512VL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %edx, %edi, %ecx +; AVX512VL-NEXT: movl $286, %edx ## imm = 0x11E +; AVX512VL-NEXT: vpinsrb $10, %esi, %xmm3, %xmm3 +; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload +; AVX512VL-NEXT: movzbl %sil, %esi +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $10, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_52 +; AVX512VL-NEXT: ## BB#51: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_52: +; AVX512VL-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %edx, %edi, %edx +; AVX512VL-NEXT: vpinsrb $11, %esi, %xmm3, %xmm3 +; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; AVX512VL-NEXT: movzbl %cl, %ecx +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $11, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_54 +; AVX512VL-NEXT: ## BB#53: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_54: +; AVX512VL-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2 +; AVX512VL-NEXT: shrl $31, %edi +; AVX512VL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3 +; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; AVX512VL-NEXT: movzbl %cl, %ecx +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $12, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_56 +; AVX512VL-NEXT: ## BB#55: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_56: +; AVX512VL-NEXT: vpinsrb $15, %edi, %xmm2, %xmm2 +; AVX512VL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3 +; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; AVX512VL-NEXT: movzbl %cl, %ecx +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $13, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_58 +; AVX512VL-NEXT: ## BB#57: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_58: +; AVX512VL-NEXT: vinserti32x4 $1, %xmm2, %ymm1, %ymm1 +; AVX512VL-NEXT: vpinsrb $14, %ecx, %xmm3, %xmm2 +; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; AVX512VL-NEXT: movzbl %cl, %ecx +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $14, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB22_60 +; AVX512VL-NEXT: ## BB#59: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB22_60: +; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX512VL-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1 +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm2 +; AVX512VL-NEXT: shrq $15, %r15 +; AVX512VL-NEXT: andb $1, %r15b +; AVX512VL-NEXT: je LBB22_62 +; AVX512VL-NEXT: ## BB#61: +; AVX512VL-NEXT: movb $-1, %r15b +; AVX512VL-NEXT: LBB22_62: +; AVX512VL-NEXT: movzbl %r15b, %eax +; AVX512VL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm2, %ymm1 +; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512VL-NEXT: vpandq {{.*}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpxor %ymm2, %ymm2, %ymm2 +; AVX512VL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 +; AVX512VL-NEXT: leaq -40(%rbp), %rsp +; AVX512VL-NEXT: popq %rbx +; AVX512VL-NEXT: popq %r12 +; AVX512VL-NEXT: popq %r13 +; AVX512VL-NEXT: popq %r14 +; AVX512VL-NEXT: popq %r15 +; AVX512VL-NEXT: popq %rbp +; AVX512VL-NEXT: retq +; ; SKX-LABEL: test16: ; SKX: ## BB#0: ; SKX-NEXT: kmovq %rdi, %k0 @@ -1314,6 +1896,447 @@ ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq ; +; AVX512VL-LABEL: test17: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: pushq %rbp +; AVX512VL-NEXT: Ltmp8: +; AVX512VL-NEXT: .cfi_def_cfa_offset 16 +; AVX512VL-NEXT: Ltmp9: +; AVX512VL-NEXT: .cfi_offset %rbp, -16 +; AVX512VL-NEXT: movq %rsp, %rbp +; AVX512VL-NEXT: Ltmp10: +; AVX512VL-NEXT: .cfi_def_cfa_register %rbp +; AVX512VL-NEXT: pushq %r15 +; AVX512VL-NEXT: pushq %r14 +; AVX512VL-NEXT: pushq %r13 +; AVX512VL-NEXT: pushq %r12 +; AVX512VL-NEXT: pushq %rbx +; AVX512VL-NEXT: andq $-32, %rsp +; AVX512VL-NEXT: subq $128, %rsp +; AVX512VL-NEXT: Ltmp11: +; AVX512VL-NEXT: .cfi_offset %rbx, -56 +; AVX512VL-NEXT: Ltmp12: +; AVX512VL-NEXT: .cfi_offset %r12, -48 +; AVX512VL-NEXT: Ltmp13: +; AVX512VL-NEXT: .cfi_offset %r13, -40 +; AVX512VL-NEXT: Ltmp14: +; AVX512VL-NEXT: .cfi_offset %r14, -32 +; AVX512VL-NEXT: Ltmp15: +; AVX512VL-NEXT: .cfi_offset %r15, -24 +; AVX512VL-NEXT: movq %rdi, %rax +; AVX512VL-NEXT: shrq $32, %rax +; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; AVX512VL-NEXT: movl %edi, %eax +; AVX512VL-NEXT: andl $1, %eax +; AVX512VL-NEXT: vmovd %eax, %xmm0 +; AVX512VL-NEXT: movl $257, %eax ## imm = 0x101 +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $258, %eax ## imm = 0x102 +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $259, %eax ## imm = 0x103 +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $260, %eax ## imm = 0x104 +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $261, %eax ## imm = 0x105 +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $262, %eax ## imm = 0x106 +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $263, %eax ## imm = 0x107 +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $264, %eax ## imm = 0x108 +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $265, %eax ## imm = 0x109 +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $266, %eax ## imm = 0x10A +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $267, %eax ## imm = 0x10B +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $268, %eax ## imm = 0x10C +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $269, %eax ## imm = 0x10D +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $270, %eax ## imm = 0x10E +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: movl $271, %eax ## imm = 0x10F +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1 +; AVX512VL-NEXT: cmpl %edx, %esi +; AVX512VL-NEXT: setg %al +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm0 +; AVX512VL-NEXT: movl {{[0-9]+}}(%rsp), %r15d +; AVX512VL-NEXT: movq %r15, %rdx +; AVX512VL-NEXT: shrq $17, %rdx +; AVX512VL-NEXT: andb $1, %dl +; AVX512VL-NEXT: je LBB23_2 +; AVX512VL-NEXT: ## BB#1: +; AVX512VL-NEXT: movb $-1, %dl +; AVX512VL-NEXT: LBB23_2: +; AVX512VL-NEXT: movq %r15, %r11 +; AVX512VL-NEXT: shrq $16, %r11 +; AVX512VL-NEXT: andb $1, %r11b +; AVX512VL-NEXT: je LBB23_4 +; AVX512VL-NEXT: ## BB#3: +; AVX512VL-NEXT: movb $-1, %r11b +; AVX512VL-NEXT: LBB23_4: +; AVX512VL-NEXT: movq %r15, %r10 +; AVX512VL-NEXT: shrq $18, %r10 +; AVX512VL-NEXT: andb $1, %r10b +; AVX512VL-NEXT: je LBB23_6 +; AVX512VL-NEXT: ## BB#5: +; AVX512VL-NEXT: movb $-1, %r10b +; AVX512VL-NEXT: LBB23_6: +; AVX512VL-NEXT: movq %r15, %r9 +; AVX512VL-NEXT: shrq $19, %r9 +; AVX512VL-NEXT: andb $1, %r9b +; AVX512VL-NEXT: je LBB23_8 +; AVX512VL-NEXT: ## BB#7: +; AVX512VL-NEXT: movb $-1, %r9b +; AVX512VL-NEXT: LBB23_8: +; AVX512VL-NEXT: movq %r15, %rbx +; AVX512VL-NEXT: shrq $20, %rbx +; AVX512VL-NEXT: andb $1, %bl +; AVX512VL-NEXT: je LBB23_10 +; AVX512VL-NEXT: ## BB#9: +; AVX512VL-NEXT: movb $-1, %bl +; AVX512VL-NEXT: LBB23_10: +; AVX512VL-NEXT: movq %r15, %r12 +; AVX512VL-NEXT: shrq $21, %r12 +; AVX512VL-NEXT: andb $1, %r12b +; AVX512VL-NEXT: je LBB23_12 +; AVX512VL-NEXT: ## BB#11: +; AVX512VL-NEXT: movb $-1, %r12b +; AVX512VL-NEXT: LBB23_12: +; AVX512VL-NEXT: movq %r15, %r14 +; AVX512VL-NEXT: shrq $22, %r14 +; AVX512VL-NEXT: andb $1, %r14b +; AVX512VL-NEXT: je LBB23_14 +; AVX512VL-NEXT: ## BB#13: +; AVX512VL-NEXT: movb $-1, %r14b +; AVX512VL-NEXT: LBB23_14: +; AVX512VL-NEXT: movq %r15, %r8 +; AVX512VL-NEXT: shrq $23, %r8 +; AVX512VL-NEXT: andb $1, %r8b +; AVX512VL-NEXT: je LBB23_16 +; AVX512VL-NEXT: ## BB#15: +; AVX512VL-NEXT: movb $-1, %r8b +; AVX512VL-NEXT: LBB23_16: +; AVX512VL-NEXT: movq %r15, %r13 +; AVX512VL-NEXT: shrq $24, %r13 +; AVX512VL-NEXT: andb $1, %r13b +; AVX512VL-NEXT: je LBB23_18 +; AVX512VL-NEXT: ## BB#17: +; AVX512VL-NEXT: movb $-1, %r13b +; AVX512VL-NEXT: LBB23_18: +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $25, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_20 +; AVX512VL-NEXT: ## BB#19: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_20: +; AVX512VL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $26, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_22 +; AVX512VL-NEXT: ## BB#21: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_22: +; AVX512VL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; AVX512VL-NEXT: movl $272, %esi ## imm = 0x110 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $27, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_24 +; AVX512VL-NEXT: ## BB#23: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_24: +; AVX512VL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; AVX512VL-NEXT: movl $273, %eax ## imm = 0x111 +; AVX512VL-NEXT: bextrl %esi, %edi, %esi +; AVX512VL-NEXT: movq %r15, %rcx +; AVX512VL-NEXT: shrq $28, %rcx +; AVX512VL-NEXT: andb $1, %cl +; AVX512VL-NEXT: je LBB23_26 +; AVX512VL-NEXT: ## BB#25: +; AVX512VL-NEXT: movb $-1, %cl +; AVX512VL-NEXT: LBB23_26: +; AVX512VL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill +; AVX512VL-NEXT: bextrl %eax, %edi, %eax +; AVX512VL-NEXT: vmovd %esi, %xmm2 +; AVX512VL-NEXT: movl $274, %esi ## imm = 0x112 +; AVX512VL-NEXT: movq %r15, %rcx +; AVX512VL-NEXT: shrq $29, %rcx +; AVX512VL-NEXT: andb $1, %cl +; AVX512VL-NEXT: je LBB23_28 +; AVX512VL-NEXT: ## BB#27: +; AVX512VL-NEXT: movb $-1, %cl +; AVX512VL-NEXT: LBB23_28: +; AVX512VL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill +; AVX512VL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %esi, %edi, %eax +; AVX512VL-NEXT: movzbl %r11b, %esi +; AVX512VL-NEXT: movq %r15, %rcx +; AVX512VL-NEXT: shrq $30, %rcx +; AVX512VL-NEXT: andb $1, %cl +; AVX512VL-NEXT: je LBB23_30 +; AVX512VL-NEXT: ## BB#29: +; AVX512VL-NEXT: movb $-1, %cl +; AVX512VL-NEXT: LBB23_30: +; AVX512VL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX512VL-NEXT: movl $275, %eax ## imm = 0x113 +; AVX512VL-NEXT: bextrl %eax, %edi, %r11d +; AVX512VL-NEXT: movzbl %dl, %edx +; AVX512VL-NEXT: vmovd %esi, %xmm3 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $31, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_32 +; AVX512VL-NEXT: ## BB#31: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_32: +; AVX512VL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; AVX512VL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill +; AVX512VL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2 +; AVX512VL-NEXT: movl $276, %eax ## imm = 0x114 +; AVX512VL-NEXT: bextrl %eax, %edi, %esi +; AVX512VL-NEXT: movl $277, %r11d ## imm = 0x115 +; AVX512VL-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3 +; AVX512VL-NEXT: movzbl %r10b, %r10d +; AVX512VL-NEXT: movb %r15b, %al +; AVX512VL-NEXT: shrb %al +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_34 +; AVX512VL-NEXT: ## BB#33: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_34: +; AVX512VL-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %r11d, %edi, %edx +; AVX512VL-NEXT: movl $278, %r11d ## imm = 0x116 +; AVX512VL-NEXT: vpinsrb $2, %r10d, %xmm3, %xmm3 +; AVX512VL-NEXT: movzbl %r9b, %esi +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: movq %r15, %rcx +; AVX512VL-NEXT: shlq $63, %rcx +; AVX512VL-NEXT: sarq $63, %rcx +; AVX512VL-NEXT: vmovd %ecx, %xmm4 +; AVX512VL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movb %r15b, %al +; AVX512VL-NEXT: shrb $2, %al +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_36 +; AVX512VL-NEXT: ## BB#35: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_36: +; AVX512VL-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %r11d, %edi, %edx +; AVX512VL-NEXT: movl $279, %r9d ## imm = 0x117 +; AVX512VL-NEXT: vpinsrb $3, %esi, %xmm3, %xmm3 +; AVX512VL-NEXT: movzbl %bl, %ebx +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movb %r15b, %al +; AVX512VL-NEXT: shrb $3, %al +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_38 +; AVX512VL-NEXT: ## BB#37: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_38: +; AVX512VL-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %r9d, %edi, %edx +; AVX512VL-NEXT: movl $280, %esi ## imm = 0x118 +; AVX512VL-NEXT: vpinsrb $4, %ebx, %xmm3, %xmm3 +; AVX512VL-NEXT: movzbl %r12b, %ebx +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movb %r15b, %al +; AVX512VL-NEXT: shrb $4, %al +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_40 +; AVX512VL-NEXT: ## BB#39: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_40: +; AVX512VL-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %esi, %edi, %ecx +; AVX512VL-NEXT: movl $281, %edx ## imm = 0x119 +; AVX512VL-NEXT: vpinsrb $5, %ebx, %xmm3, %xmm3 +; AVX512VL-NEXT: movzbl %r14b, %esi +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movb %r15b, %al +; AVX512VL-NEXT: shrb $5, %al +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_42 +; AVX512VL-NEXT: ## BB#41: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_42: +; AVX512VL-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %edx, %edi, %ecx +; AVX512VL-NEXT: movl $282, %edx ## imm = 0x11A +; AVX512VL-NEXT: vpinsrb $6, %esi, %xmm3, %xmm3 +; AVX512VL-NEXT: movzbl %r8b, %esi +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movb %r15b, %bl +; AVX512VL-NEXT: shrb $6, %bl +; AVX512VL-NEXT: andb $1, %bl +; AVX512VL-NEXT: je LBB23_44 +; AVX512VL-NEXT: ## BB#43: +; AVX512VL-NEXT: movb $-1, %bl +; AVX512VL-NEXT: LBB23_44: +; AVX512VL-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %edx, %edi, %eax +; AVX512VL-NEXT: movl $283, %ecx ## imm = 0x11B +; AVX512VL-NEXT: vpinsrb $7, %esi, %xmm3, %xmm3 +; AVX512VL-NEXT: movzbl %r13b, %esi +; AVX512VL-NEXT: movzbl %bl, %edx +; AVX512VL-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4 +; AVX512VL-NEXT: movb %r15b, %bl +; AVX512VL-NEXT: shrb $7, %bl +; AVX512VL-NEXT: je LBB23_46 +; AVX512VL-NEXT: ## BB#45: +; AVX512VL-NEXT: movb $-1, %bl +; AVX512VL-NEXT: LBB23_46: +; AVX512VL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %ecx, %edi, %ecx +; AVX512VL-NEXT: movl $284, %edx ## imm = 0x11C +; AVX512VL-NEXT: vpinsrb $8, %esi, %xmm3, %xmm3 +; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rax ## 8-byte Reload +; AVX512VL-NEXT: movzbl %al, %esi +; AVX512VL-NEXT: movzbl %bl, %eax +; AVX512VL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $8, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_48 +; AVX512VL-NEXT: ## BB#47: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_48: +; AVX512VL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %edx, %edi, %ecx +; AVX512VL-NEXT: movl $285, %edx ## imm = 0x11D +; AVX512VL-NEXT: vpinsrb $9, %esi, %xmm3, %xmm3 +; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload +; AVX512VL-NEXT: movzbl %sil, %esi +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $9, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_50 +; AVX512VL-NEXT: ## BB#49: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_50: +; AVX512VL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %edx, %edi, %ecx +; AVX512VL-NEXT: movl $286, %edx ## imm = 0x11E +; AVX512VL-NEXT: vpinsrb $10, %esi, %xmm3, %xmm3 +; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload +; AVX512VL-NEXT: movzbl %sil, %esi +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $10, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_52 +; AVX512VL-NEXT: ## BB#51: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_52: +; AVX512VL-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 +; AVX512VL-NEXT: bextrl %edx, %edi, %edx +; AVX512VL-NEXT: vpinsrb $11, %esi, %xmm3, %xmm3 +; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; AVX512VL-NEXT: movzbl %cl, %ecx +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $11, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_54 +; AVX512VL-NEXT: ## BB#53: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_54: +; AVX512VL-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2 +; AVX512VL-NEXT: shrl $31, %edi +; AVX512VL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3 +; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; AVX512VL-NEXT: movzbl %cl, %ecx +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $12, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_56 +; AVX512VL-NEXT: ## BB#55: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_56: +; AVX512VL-NEXT: vpinsrb $15, %edi, %xmm2, %xmm2 +; AVX512VL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3 +; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; AVX512VL-NEXT: movzbl %cl, %ecx +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $13, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_58 +; AVX512VL-NEXT: ## BB#57: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_58: +; AVX512VL-NEXT: vinserti32x4 $1, %xmm2, %ymm1, %ymm1 +; AVX512VL-NEXT: vpinsrb $14, %ecx, %xmm3, %xmm2 +; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; AVX512VL-NEXT: movzbl %cl, %ecx +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3 +; AVX512VL-NEXT: movq %r15, %rax +; AVX512VL-NEXT: shrq $14, %rax +; AVX512VL-NEXT: andb $1, %al +; AVX512VL-NEXT: je LBB23_60 +; AVX512VL-NEXT: ## BB#59: +; AVX512VL-NEXT: movb $-1, %al +; AVX512VL-NEXT: LBB23_60: +; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX512VL-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1 +; AVX512VL-NEXT: movzbl %al, %eax +; AVX512VL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm2 +; AVX512VL-NEXT: shrq $15, %r15 +; AVX512VL-NEXT: andb $1, %r15b +; AVX512VL-NEXT: je LBB23_62 +; AVX512VL-NEXT: ## BB#61: +; AVX512VL-NEXT: movb $-1, %r15b +; AVX512VL-NEXT: LBB23_62: +; AVX512VL-NEXT: movzbl %r15b, %eax +; AVX512VL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm2, %ymm1 +; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512VL-NEXT: vpandq {{.*}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpxor %ymm2, %ymm2, %ymm2 +; AVX512VL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 +; AVX512VL-NEXT: leaq -40(%rbp), %rsp +; AVX512VL-NEXT: popq %rbx +; AVX512VL-NEXT: popq %r12 +; AVX512VL-NEXT: popq %r13 +; AVX512VL-NEXT: popq %r14 +; AVX512VL-NEXT: popq %r15 +; AVX512VL-NEXT: popq %rbp +; AVX512VL-NEXT: retq +; ; SKX-LABEL: test17: ; SKX: ## BB#0: ; SKX-NEXT: kmovq %rdi, %k0 @@ -1349,6 +2372,22 @@ ; KNL-NEXT: vpmovqw %zmm0, %xmm0 ; KNL-NEXT: retq ; +; AVX512VL-LABEL: test18: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: kmovw %edi, %k0 +; AVX512VL-NEXT: kmovw %esi, %k1 +; AVX512VL-NEXT: kshiftlw $7, %k1, %k2 +; AVX512VL-NEXT: kshiftrw $15, %k2, %k2 +; AVX512VL-NEXT: kshiftlw $6, %k1, %k1 +; AVX512VL-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VL-NEXT: kshiftlw $6, %k1, %k1 +; AVX512VL-NEXT: korw %k1, %k0, %k0 +; AVX512VL-NEXT: kshiftlw $7, %k2, %k1 +; AVX512VL-NEXT: korw %k1, %k0, %k1 +; AVX512VL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512VL-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512VL-NEXT: retq +; ; SKX-LABEL: test18: ; SKX: ## BB#0: ; SKX-NEXT: kmovb %edi, %k0 @@ -1385,6 +2424,19 @@ ; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1 ; KNL-NEXT: retq ; +; AVX512VL-LABEL: test21: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero +; AVX512VL-NEXT: vpsllw $15, %ymm3, %ymm3 +; AVX512VL-NEXT: vpsraw $15, %ymm3, %ymm3 +; AVX512VL-NEXT: vpandq %ymm0, %ymm3, %ymm0 +; AVX512VL-NEXT: vextracti128 $1, %ymm2, %xmm2 +; AVX512VL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero +; AVX512VL-NEXT: vpsllw $15, %ymm2, %ymm2 +; AVX512VL-NEXT: vpsraw $15, %ymm2, %ymm2 +; AVX512VL-NEXT: vpandq %ymm1, %ymm2, %ymm1 +; AVX512VL-NEXT: retq +; ; SKX-LABEL: test21: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 @@ -1412,6 +2464,14 @@ ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: retq ; +; AVX512VL-LABEL: test22: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: movb %al, (%rdi) +; AVX512VL-NEXT: retq +; ; SKX-LABEL: test22: ; SKX: ## BB#0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 @@ -1433,6 +2493,14 @@ ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: retq ; +; AVX512VL-LABEL: test23: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: movb %al, (%rdi) +; AVX512VL-NEXT: retq +; ; SKX-LABEL: test23: ; SKX: ## BB#0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0