Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -21535,14 +21535,23 @@ } if (VT == MVT::v16i8 || - (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP())) { + (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) || + (VT == MVT::v64i8 && Subtarget.hasBWI())) { MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); unsigned ShiftOpcode = Op->getOpcode(); auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) { - // On SSE41 targets we make use of the fact that VSELECT lowers - // to PBLENDVB which selects bytes based just on the sign bit. - if (Subtarget.hasSSE41()) { + if (Subtarget.hasBWI() && VT.is512BitVector()) { + // On AVX512BW targets we test for the sign bit by comparing to + // zero - a negative value will set all bits of the lanes to true + // and VSELECT uses that as a masked blend. + MVT MaskVT = MVT::getVectorVT(MVT::i1, SelVT.getVectorNumElements()); + SDValue Z = getZeroVector(SelVT, Subtarget, DAG, dl); + SDValue C = DAG.getNode(X86ISD::PCMPGTM, dl, MaskVT, Z, Sel); + return DAG.getNode(ISD::VSELECT, dl, SelVT, C, V0, V1); + } else if (Subtarget.hasSSE41()) { + // On SSE41 targets we make use of the fact that VSELECT lowers + // to PBLENDVB which selects bytes based just on the sign bit. V0 = DAG.getBitcast(VT, V0); V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); Index: test/CodeGen/X86/vector-shift-ashr-512.ll =================================================================== --- test/CodeGen/X86/vector-shift-ashr-512.ll +++ test/CodeGen/X86/vector-shift-ashr-512.ll @@ -110,399 +110,31 @@ ; ; AVX512BW-LABEL: var_shift_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %ecx -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $3, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $7, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $11, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $15, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2 -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm4, %esi -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %sil -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %sil -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: movzbl %sil, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63] +; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 +; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] +; AVX512BW-NEXT: vpxord %zmm4, %zmm4, %zmm4 +; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $4, %zmm2, %zmm2 {%k1} +; AVX512BW-NEXT: vpaddw %zmm3, %zmm3, %zmm3 +; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $2, %zmm2, %zmm2 {%k1} +; AVX512BW-NEXT: vpaddw %zmm3, %zmm3, %zmm3 +; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $1, %zmm2, %zmm2 {%k1} +; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 +; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55] +; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] +; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $4, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $2, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %shift = ashr <64 x i8> %a, %b ret <64 x i8> %shift @@ -601,399 +233,31 @@ ; AVX512BW-LABEL: splatvar_shift_v64i8: ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $3, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $7, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $11, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $15, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2 -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpextrb $2, %xmm3, %esi -; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %sil -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $4, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $6, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $8, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $10, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $12, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $14, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi -; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %sil -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: movzbl %sil, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63] +; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 +; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] +; AVX512BW-NEXT: vpxord %zmm4, %zmm4, %zmm4 +; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $4, %zmm2, %zmm2 {%k1} +; AVX512BW-NEXT: vpaddw %zmm3, %zmm3, %zmm3 +; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $2, %zmm2, %zmm2 {%k1} +; AVX512BW-NEXT: vpaddw %zmm3, %zmm3, %zmm3 +; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $1, %zmm2, %zmm2 {%k1} +; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 +; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55] +; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] +; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $4, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $2, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer %shift = ashr <64 x i8> %a, %splat @@ -1102,252 +366,31 @@ ; ; AVX512BW-LABEL: constant_shift_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63] +; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm2 +; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm0[8],zmm2[8],zmm0[9],zmm2[9],zmm0[10],zmm2[10],zmm0[11],zmm2[11],zmm0[12],zmm2[12],zmm0[13],zmm2[13],zmm0[14],zmm2[14],zmm0[15],zmm2[15],zmm0[24],zmm2[24],zmm0[25],zmm2[25],zmm0[26],zmm2[26],zmm0[27],zmm2[27],zmm0[28],zmm2[28],zmm0[29],zmm2[29],zmm0[30],zmm2[30],zmm0[31],zmm2[31],zmm0[40],zmm2[40],zmm0[41],zmm2[41],zmm0[42],zmm2[42],zmm0[43],zmm2[43],zmm0[44],zmm2[44],zmm0[45],zmm2[45],zmm0[46],zmm2[46],zmm0[47],zmm2[47],zmm0[56],zmm2[56],zmm0[57],zmm2[57],zmm0[58],zmm2[58],zmm0[59],zmm2[59],zmm0[60],zmm2[60],zmm0[61],zmm2[61],zmm0[62],zmm2[62],zmm0[63],zmm2[63] +; AVX512BW-NEXT: vpxord %zmm4, %zmm4, %zmm4 +; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $4, %zmm1, %zmm1 {%k1} +; AVX512BW-NEXT: vpaddw %zmm3, %zmm3, %zmm3 +; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $2, %zmm1, %zmm1 {%k1} +; AVX512BW-NEXT: vpaddw %zmm3, %zmm3, %zmm3 +; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $1, %zmm1, %zmm1 {%k1} +; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 +; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55] +; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm0[0],zmm2[0],zmm0[1],zmm2[1],zmm0[2],zmm2[2],zmm0[3],zmm2[3],zmm0[4],zmm2[4],zmm0[5],zmm2[5],zmm0[6],zmm2[6],zmm0[7],zmm2[7],zmm0[16],zmm2[16],zmm0[17],zmm2[17],zmm0[18],zmm2[18],zmm0[19],zmm2[19],zmm0[20],zmm2[20],zmm0[21],zmm2[21],zmm0[22],zmm2[22],zmm0[23],zmm2[23],zmm0[32],zmm2[32],zmm0[33],zmm2[33],zmm0[34],zmm2[34],zmm0[35],zmm2[35],zmm0[36],zmm2[36],zmm0[37],zmm2[37],zmm0[38],zmm2[38],zmm0[39],zmm2[39],zmm0[48],zmm2[48],zmm0[49],zmm2[49],zmm0[50],zmm2[50],zmm0[51],zmm2[51],zmm0[52],zmm2[52],zmm0[53],zmm2[53],zmm0[54],zmm2[54],zmm0[55],zmm2[55] +; AVX512BW-NEXT: vpcmpgtw %zmm2, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $4, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpaddw %zmm2, %zmm2, %zmm2 +; AVX512BW-NEXT: vpcmpgtw %zmm2, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $2, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpaddw %zmm2, %zmm2, %zmm2 +; AVX512BW-NEXT: vpcmpgtw %zmm2, %zmm4, %k1 +; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %shift = ashr <64 x i8> %a, ret <64 x i8> %shift Index: test/CodeGen/X86/vector-shift-lshr-512.ll =================================================================== --- test/CodeGen/X86/vector-shift-lshr-512.ll +++ test/CodeGen/X86/vector-shift-lshr-512.ll @@ -90,399 +90,22 @@ ; ; AVX512BW-LABEL: var_shift_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %ecx -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $3, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $7, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $11, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $15, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2 -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm4, %esi -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %sil -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %sil -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: movzbl %sil, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm2 +; AVX512BW-NEXT: vpxord %zmm3, %zmm3, %zmm3 +; AVX512BW-NEXT: vpcmpgtb %zmm2, %zmm3, %k1 +; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm3, %k2 +; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k2} +; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2 +; AVX512BW-NEXT: vpcmpgtb %zmm2, %zmm3, %k1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} ; AVX512BW-NEXT: retq %shift = lshr <64 x i8> %a, %b ret <64 x i8> %shift @@ -564,399 +187,22 @@ ; AVX512BW-LABEL: splatvar_shift_v64i8: ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $3, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $7, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $11, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $15, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2 -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpextrb $2, %xmm3, %esi -; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %sil -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $4, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $6, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $8, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $10, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $12, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $14, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi -; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %sil -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: movzbl %sil, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm2 +; AVX512BW-NEXT: vpxord %zmm3, %zmm3, %zmm3 +; AVX512BW-NEXT: vpcmpgtb %zmm2, %zmm3, %k1 +; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm3, %k2 +; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k2} +; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2 +; AVX512BW-NEXT: vpcmpgtb %zmm2, %zmm3, %k1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} ; AVX512BW-NEXT: retq %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer %shift = lshr <64 x i8> %a, %splat @@ -1048,252 +294,22 @@ ; ; AVX512BW-LABEL: constant_shift_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm1 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm2 +; AVX512BW-NEXT: vpxord %zmm3, %zmm3, %zmm3 +; AVX512BW-NEXT: vpcmpgtb %zmm2, %zmm3, %k1 +; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm3, %k2 +; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k2} +; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2 +; AVX512BW-NEXT: vpcmpgtb %zmm2, %zmm3, %k1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} ; AVX512BW-NEXT: retq %shift = lshr <64 x i8> %a, ret <64 x i8> %shift Index: test/CodeGen/X86/vector-shift-shl-512.ll =================================================================== --- test/CodeGen/X86/vector-shift-shl-512.ll +++ test/CodeGen/X86/vector-shift-shl-512.ll @@ -87,399 +87,20 @@ ; ; AVX512BW-LABEL: var_shift_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %ecx -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $3, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $7, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $11, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $15, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2 -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm4, %esi -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %sil -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %sil -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: movzbl %sil, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm2 +; AVX512BW-NEXT: vpxord %zmm3, %zmm3, %zmm3 +; AVX512BW-NEXT: vpcmpgtb %zmm2, %zmm3, %k1 +; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm3, %k2 +; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k2} +; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm1 +; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm3, %k1 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1} ; AVX512BW-NEXT: retq %shift = shl <64 x i8> %a, %b ret <64 x i8> %shift @@ -558,399 +179,20 @@ ; AVX512BW-LABEL: splatvar_shift_v64i8: ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $3, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $7, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $11, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $15, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2 -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpextrb $2, %xmm3, %esi -; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %sil -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $4, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $6, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $8, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $10, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $12, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $14, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi -; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %sil -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: movzbl %sil, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm2 +; AVX512BW-NEXT: vpxord %zmm3, %zmm3, %zmm3 +; AVX512BW-NEXT: vpcmpgtb %zmm2, %zmm3, %k1 +; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm3, %k2 +; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k2} +; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm1 +; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm3, %k1 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1} ; AVX512BW-NEXT: retq %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer %shift = shl <64 x i8> %a, %splat @@ -1024,252 +266,20 @@ ; ; AVX512BW-LABEL: constant_shift_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm1 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm2 +; AVX512BW-NEXT: vpxord %zmm3, %zmm3, %zmm3 +; AVX512BW-NEXT: vpcmpgtb %zmm2, %zmm3, %k1 +; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm3, %k2 +; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k2} +; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} +; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm1 +; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm3, %k1 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1} ; AVX512BW-NEXT: retq %shift = shl <64 x i8> %a, ret <64 x i8> %shift