Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -21752,14 +21752,26 @@ } if (VT == MVT::v16i8 || - (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP())) { + (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) || + (VT == MVT::v64i8 && Subtarget.hasBWI())) { MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); unsigned ShiftOpcode = Op->getOpcode(); auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) { - // On SSE41 targets we make use of the fact that VSELECT lowers - // to PBLENDVB which selects bytes based just on the sign bit. - if (Subtarget.hasSSE41()) { + if (VT.is512BitVector()) { + // On AVX512BW targets we make use of the fact that VSELECT lowers + // to a masked blend which selects bytes based just on the sign bit + // extracted to a mask. + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + V0 = DAG.getBitcast(VT, V0); + V1 = DAG.getBitcast(VT, V1); + Sel = DAG.getBitcast(VT, Sel); + Sel = DAG.getNode(X86ISD::CVT2MASK, dl, MaskVT, Sel); + return DAG.getBitcast(SelVT, + DAG.getNode(ISD::VSELECT, dl, VT, Sel, V0, V1)); + } else if (Subtarget.hasSSE41()) { + // On SSE41 targets we make use of the fact that VSELECT lowers + // to PBLENDVB which selects bytes based just on the sign bit. V0 = DAG.getBitcast(VT, V0); V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); Index: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp +++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp @@ -323,6 +323,10 @@ { ISD::SRL, MVT::v32i16, 1 }, // vpsrlvw { ISD::SRA, MVT::v32i16, 1 }, // vpsravw + { ISD::SHL, MVT::v64i8, 11 }, // vpblendvb sequence. + { ISD::SRL, MVT::v64i8, 11 }, // vpblendvb sequence. + { ISD::SRA, MVT::v64i8, 24 }, // vpblendvb sequence. + { ISD::MUL, MVT::v64i8, 11 }, // extend/pmullw/trunc sequence. { ISD::MUL, MVT::v32i8, 4 }, // extend/pmullw/trunc sequence. { ISD::MUL, MVT::v16i8, 4 }, // extend/pmullw/trunc sequence. Index: llvm/trunk/test/Analysis/CostModel/X86/vshift-ashr-cost.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ llvm/trunk/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -165,9 +165,9 @@ ; AVX: Found an estimated cost of 96 for instruction: %shift ; AVX2: Found an estimated cost of 48 for instruction: %shift ; AVX512F: Found an estimated cost of 48 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 24 for instruction: %shift ; AVX512VL: Found an estimated cost of 48 for instruction: %shift -; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift +; AVX512BWVL: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 8 for instruction: %shift %shift = ashr <64 x i8> %a, %b ret <64 x i8> %shift @@ -333,9 +333,9 @@ ; AVX: Found an estimated cost of 96 for instruction: %shift ; AVX2: Found an estimated cost of 48 for instruction: %shift ; AVX512F: Found an estimated cost of 48 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 24 for instruction: %shift ; AVX512VL: Found an estimated cost of 48 for instruction: %shift -; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift +; AVX512BWVL: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 8 for instruction: %shift %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer %shift = ashr <64 x i8> %a, %splat @@ -491,9 +491,9 @@ ; AVX: Found an estimated cost of 96 for instruction: %shift ; AVX2: Found an estimated cost of 48 for instruction: %shift ; AVX512F: Found an estimated cost of 48 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 24 for instruction: %shift ; AVX512VL: Found an estimated cost of 48 for instruction: %shift -; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift +; AVX512BWVL: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 8 for instruction: %shift %shift = ashr <64 x i8> %a, ret <64 x i8> %shift Index: llvm/trunk/test/Analysis/CostModel/X86/vshift-lshr-cost.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ llvm/trunk/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -165,9 +165,9 @@ ; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 11 for instruction: %shift ; AVX512VL: Found an estimated cost of 22 for instruction: %shift -; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift +; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 8 for instruction: %shift %shift = lshr <64 x i8> %a, %b ret <64 x i8> %shift @@ -336,9 +336,9 @@ ; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 11 for instruction: %shift ; AVX512VL: Found an estimated cost of 22 for instruction: %shift -; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift +; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 8 for instruction: %shift %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer %shift = lshr <64 x i8> %a, %splat @@ -497,9 +497,9 @@ ; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 11 for instruction: %shift ; AVX512VL: Found an estimated cost of 22 for instruction: %shift -; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift +; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 8 for instruction: %shift %shift = lshr <64 x i8> %a, ret <64 x i8> %shift Index: llvm/trunk/test/Analysis/CostModel/X86/vshift-shl-cost.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ llvm/trunk/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -164,9 +164,9 @@ ; AVX: Found an estimated cost of 44 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 11 for instruction: %shift ; AVX512VL: Found an estimated cost of 22 for instruction: %shift -; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift +; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = shl <64 x i8> %a, %b ret <64 x i8> %shift @@ -335,9 +335,9 @@ ; AVX: Found an estimated cost of 44 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 11 for instruction: %shift ; AVX512VL: Found an estimated cost of 22 for instruction: %shift -; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift +; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer %shift = shl <64 x i8> %a, %splat @@ -498,9 +498,9 @@ ; AVX: Found an estimated cost of 44 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 11 for instruction: %shift ; AVX512VL: Found an estimated cost of 22 for instruction: %shift -; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift +; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = shl <64 x i8> %a, ret <64 x i8> %shift Index: llvm/trunk/test/CodeGen/X86/vector-shift-ashr-512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shift-ashr-512.ll +++ llvm/trunk/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -100,399 +100,36 @@ ; ; AVX512BW-LABEL: var_shift_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %ecx -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $3, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $7, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $11, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $15, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2 -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm4, %esi -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %sil -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %sil -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: movzbl %sil, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63] +; AVX512BW-NEXT: vpsraw $4, %zmm2, %zmm3 +; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 +; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] +; AVX512BW-NEXT: vpmovb2m %zmm4, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} +; AVX512BW-NEXT: vpsraw $2, %zmm2, %zmm3 +; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4 +; AVX512BW-NEXT: vpmovb2m %zmm4, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} +; AVX512BW-NEXT: vpsraw $1, %zmm2, %zmm3 +; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4 +; AVX512BW-NEXT: vpmovb2m %zmm4, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} +; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 +; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55] +; AVX512BW-NEXT: vpsraw $4, %zmm0, %zmm3 +; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1} +; AVX512BW-NEXT: vpsraw $2, %zmm0, %zmm3 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1} +; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm3 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %shift = ashr <64 x i8> %a, %b ret <64 x i8> %shift @@ -591,399 +228,36 @@ ; AVX512BW-LABEL: splatvar_shift_v64i8: ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $3, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $7, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $11, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $15, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2 -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpextrb $2, %xmm3, %esi -; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %sil -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $4, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $6, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $8, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $10, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $12, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $14, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi -; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %sil -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: movzbl %sil, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %dl -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: sarb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63] +; AVX512BW-NEXT: vpsraw $4, %zmm2, %zmm3 +; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 +; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] +; AVX512BW-NEXT: vpmovb2m %zmm4, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} +; AVX512BW-NEXT: vpsraw $2, %zmm2, %zmm3 +; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4 +; AVX512BW-NEXT: vpmovb2m %zmm4, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} +; AVX512BW-NEXT: vpsraw $1, %zmm2, %zmm3 +; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4 +; AVX512BW-NEXT: vpmovb2m %zmm4, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} +; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 +; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55] +; AVX512BW-NEXT: vpsraw $4, %zmm0, %zmm3 +; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1} +; AVX512BW-NEXT: vpsraw $2, %zmm0, %zmm3 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1} +; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm3 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer %shift = ashr <64 x i8> %a, %splat @@ -1081,252 +355,36 @@ ; ; AVX512BW-LABEL: constant_shift_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: sarb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax -; AVX512BW-NEXT: sarb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax -; AVX512BW-NEXT: sarb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: sarb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: sarb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax -; AVX512BW-NEXT: sarb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63] +; AVX512BW-NEXT: vpsraw $4, %zmm1, %zmm2 +; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm3 +; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm3[8],zmm0[9],zmm3[9],zmm0[10],zmm3[10],zmm0[11],zmm3[11],zmm0[12],zmm3[12],zmm0[13],zmm3[13],zmm0[14],zmm3[14],zmm0[15],zmm3[15],zmm0[24],zmm3[24],zmm0[25],zmm3[25],zmm0[26],zmm3[26],zmm0[27],zmm3[27],zmm0[28],zmm3[28],zmm0[29],zmm3[29],zmm0[30],zmm3[30],zmm0[31],zmm3[31],zmm0[40],zmm3[40],zmm0[41],zmm3[41],zmm0[42],zmm3[42],zmm0[43],zmm3[43],zmm0[44],zmm3[44],zmm0[45],zmm3[45],zmm0[46],zmm3[46],zmm0[47],zmm3[47],zmm0[56],zmm3[56],zmm0[57],zmm3[57],zmm0[58],zmm3[58],zmm0[59],zmm3[59],zmm0[60],zmm3[60],zmm0[61],zmm3[61],zmm0[62],zmm3[62],zmm0[63],zmm3[63] +; AVX512BW-NEXT: vpmovb2m %zmm4, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1} +; AVX512BW-NEXT: vpsraw $2, %zmm1, %zmm2 +; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4 +; AVX512BW-NEXT: vpmovb2m %zmm4, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1} +; AVX512BW-NEXT: vpsraw $1, %zmm1, %zmm2 +; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4 +; AVX512BW-NEXT: vpmovb2m %zmm4, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1} +; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 +; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55] +; AVX512BW-NEXT: vpsraw $4, %zmm0, %zmm2 +; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm3[0],zmm0[1],zmm3[1],zmm0[2],zmm3[2],zmm0[3],zmm3[3],zmm0[4],zmm3[4],zmm0[5],zmm3[5],zmm0[6],zmm3[6],zmm0[7],zmm3[7],zmm0[16],zmm3[16],zmm0[17],zmm3[17],zmm0[18],zmm3[18],zmm0[19],zmm3[19],zmm0[20],zmm3[20],zmm0[21],zmm3[21],zmm0[22],zmm3[22],zmm0[23],zmm3[23],zmm0[32],zmm3[32],zmm0[33],zmm3[33],zmm0[34],zmm3[34],zmm0[35],zmm3[35],zmm0[36],zmm3[36],zmm0[37],zmm3[37],zmm0[38],zmm3[38],zmm0[39],zmm3[39],zmm0[48],zmm3[48],zmm0[49],zmm3[49],zmm0[50],zmm3[50],zmm0[51],zmm3[51],zmm0[52],zmm3[52],zmm0[53],zmm3[53],zmm0[54],zmm3[54],zmm0[55],zmm3[55] +; AVX512BW-NEXT: vpmovb2m %zmm3, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpsraw $2, %zmm0, %zmm2 +; AVX512BW-NEXT: vpaddw %zmm3, %zmm3, %zmm3 +; AVX512BW-NEXT: vpmovb2m %zmm3, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpaddw %zmm3, %zmm3, %zmm3 +; AVX512BW-NEXT: vpmovb2m %zmm3, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %shift = ashr <64 x i8> %a, ret <64 x i8> %shift Index: llvm/trunk/test/CodeGen/X86/vector-shift-lshr-512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shift-lshr-512.ll +++ llvm/trunk/test/CodeGen/X86/vector-shift-lshr-512.ll @@ -79,399 +79,21 @@ ; ; AVX512BW-LABEL: var_shift_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %ecx -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $3, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $7, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $11, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $15, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2 -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm4, %esi -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %sil -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %sil -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: movzbl %sil, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} ; AVX512BW-NEXT: retq %shift = lshr <64 x i8> %a, %b ret <64 x i8> %shift @@ -553,399 +175,21 @@ ; AVX512BW-LABEL: splatvar_shift_v64i8: ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $3, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $7, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $11, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $15, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2 -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpextrb $2, %xmm3, %esi -; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %sil -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $4, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $6, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $8, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $10, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $12, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $14, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi -; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %sil -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: movzbl %sil, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %dl -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shrb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} ; AVX512BW-NEXT: retq %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer %shift = lshr <64 x i8> %a, %splat @@ -1026,252 +270,21 @@ ; ; AVX512BW-LABEL: constant_shift_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: shrb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax -; AVX512BW-NEXT: shrb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax -; AVX512BW-NEXT: shrb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: shrb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: shrb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} ; AVX512BW-NEXT: retq %shift = lshr <64 x i8> %a, ret <64 x i8> %shift Index: llvm/trunk/test/CodeGen/X86/vector-shift-shl-512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shift-shl-512.ll +++ llvm/trunk/test/CodeGen/X86/vector-shift-shl-512.ll @@ -76,399 +76,19 @@ ; ; AVX512BW-LABEL: var_shift_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %ecx -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $3, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $7, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $11, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $15, %xmm2, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2 -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm4, %esi -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %sil -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %sil -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: movzbl %sil, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1} ; AVX512BW-NEXT: retq %shift = shl <64 x i8> %a, %b ret <64 x i8> %shift @@ -547,399 +167,19 @@ ; AVX512BW-LABEL: splatvar_shift_v64i8: ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx -; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $3, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $7, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $11, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $15, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax -; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm4 -; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2 -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpextrb $2, %xmm3, %esi -; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %sil -; AVX512BW-NEXT: movzbl %dl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm5 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $4, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $6, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $8, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $10, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $12, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax -; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $14, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx -; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi -; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %sil -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: movzbl %sil, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm4 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx -; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %dl -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx -; AVX512BW-NEXT: # kill: %CL %CL %ECX -; AVX512BW-NEXT: shlb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1} ; AVX512BW-NEXT: retq %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer %shift = shl <64 x i8> %a, %splat @@ -1013,252 +253,19 @@ ; ; AVX512BW-LABEL: constant_shift_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: shlb $7, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax -; AVX512BW-NEXT: shlb $6, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax -; AVX512BW-NEXT: shlb $5, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: shlb $4, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: shlb $3, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax -; AVX512BW-NEXT: shlb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax -; AVX512BW-NEXT: addb %al, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1} ; AVX512BW-NEXT: retq %shift = shl <64 x i8> %a, ret <64 x i8> %shift