Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -1438,6 +1438,8 @@ setOperationAction(ISD::MUL, MVT::v64i8, Custom); setOperationAction(ISD::MULHS, MVT::v32i16, Legal); setOperationAction(ISD::MULHU, MVT::v32i16, Legal); + setOperationAction(ISD::MULHS, MVT::v64i8, Custom); + setOperationAction(ISD::MULHU, MVT::v64i8, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom); @@ -21604,7 +21606,8 @@ return Lower256IntArith(Op, DAG); // Only i8 vectors should need custom lowering after this. - assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256())) && + assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) || + (VT == MVT::v64i8 && Subtarget.hasBWI())) && "Unsupported vector type"); // Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply, @@ -21618,6 +21621,11 @@ unsigned ExShift = (ISD::MULHU == Opcode ? ISD::SRL : ISD::SRA); unsigned ExAVX = (ISD::MULHU == Opcode ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); + // For 512-bit vectors, split into 256-bit vectors to allow the + // sign-extension to occur. + if (VT == MVT::v64i8) + return Lower512IntArith(Op, DAG); + // AVX2 implementations - extend xmm subvectors to ymm. if (Subtarget.hasInt256()) { unsigned NumElems = VT.getVectorNumElements(); Index: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll +++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll @@ -172,716 +172,26 @@ ; ; AVX512BW-LABEL: test_div7_64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512BW-NEXT: movsbl %cl, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %edx -; AVX512BW-NEXT: shrl $8, %edx -; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: movl %ecx, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %cl -; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: movzbl %cl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm2 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx -; AVX512BW-NEXT: movsbl %cl, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %edx -; AVX512BW-NEXT: shrl $8, %edx -; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: movl %ecx, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %cl -; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: movzbl %cl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm3 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx -; AVX512BW-NEXT: movsbl %cl, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %edx -; AVX512BW-NEXT: shrl $8, %edx -; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: movl %ecx, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %cl -; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: movzbl %cl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm3 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm0, %ecx -; AVX512BW-NEXT: movsbl %cl, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %edx -; AVX512BW-NEXT: shrl $8, %edx -; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: movl %ecx, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %cl -; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: movzbl %cl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm3 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %eax -; AVX512BW-NEXT: imull $-109, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427] +; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 +; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm3 +; AVX512BW-NEXT: vpmovsxbw %ymm3, %zmm3 +; AVX512BW-NEXT: vpmullw %zmm2, %zmm3, %zmm2 +; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 +; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] +; AVX512BW-NEXT: vpxorq %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq %res = sdiv <64 x i8> %a, ret <64 x i8> %res @@ -1121,909 +431,36 @@ ; ; AVX512BW-LABEL: test_rem7_64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %edx -; AVX512BW-NEXT: imull $-109, %edx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movb $7, %dil -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %esi -; AVX512BW-NEXT: imull $-109, %esi, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %sil -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm2 -; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %esi -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %edx -; AVX512BW-NEXT: imull $-109, %edx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpinsrb $1, %esi, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %esi -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %edx -; AVX512BW-NEXT: imull $-109, %edx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpinsrb $1, %esi, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %esi -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %edx -; AVX512BW-NEXT: imull $-109, %edx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrb $7, %cl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpinsrb $1, %esi, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: movsbl %al, %ecx -; AVX512BW-NEXT: imull $-109, %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: shrb $7, %dl -; AVX512BW-NEXT: sarb $2, %al -; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: # kill: %AL %AL %EAX -; AVX512BW-NEXT: mulb %dil -; AVX512BW-NEXT: subb %al, %cl -; AVX512BW-NEXT: movzbl %cl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427] +; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 +; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm3 +; AVX512BW-NEXT: vpmovsxbw %ymm3, %zmm3 +; AVX512BW-NEXT: vpmullw %zmm2, %zmm3, %zmm2 +; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 +; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] +; AVX512BW-NEXT: vpxorq %zmm3, %zmm2, %zmm2 +; AVX512BW-NEXT: vpsubb %zmm3, %zmm2, %zmm2 +; AVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1 +; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm2 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX512BW-NEXT: vpmullw %zmm3, %zmm2, %zmm2 +; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2 +; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1 +; AVX512BW-NEXT: vpmullw %zmm3, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 +; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %res = srem <64 x i8> %a, ret <64 x i8> %res Index: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll +++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll @@ -178,588 +178,23 @@ ; ; AVX512BW-LABEL: test_div7_64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512BW-NEXT: imull $37, %ecx, %edx -; AVX512BW-NEXT: shrl $8, %edx -; AVX512BW-NEXT: subb %dl, %cl -; AVX512BW-NEXT: shrb %cl -; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: shrb $2, %cl -; AVX512BW-NEXT: movzbl %cl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm2 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx -; AVX512BW-NEXT: imull $37, %ecx, %edx -; AVX512BW-NEXT: shrl $8, %edx -; AVX512BW-NEXT: subb %dl, %cl -; AVX512BW-NEXT: shrb %cl -; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: shrb $2, %cl -; AVX512BW-NEXT: movzbl %cl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm3 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx -; AVX512BW-NEXT: imull $37, %ecx, %edx -; AVX512BW-NEXT: shrl $8, %edx -; AVX512BW-NEXT: subb %dl, %cl -; AVX512BW-NEXT: shrb %cl -; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: shrb $2, %cl -; AVX512BW-NEXT: movzbl %cl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm3 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpextrb $0, %xmm0, %ecx -; AVX512BW-NEXT: imull $37, %ecx, %edx -; AVX512BW-NEXT: shrl $8, %edx -; AVX512BW-NEXT: subb %dl, %cl -; AVX512BW-NEXT: shrb %cl -; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: shrb $2, %cl -; AVX512BW-NEXT: movzbl %cl, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm3 -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax -; AVX512BW-NEXT: imull $37, %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movzbl %al, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] +; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 +; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm3 +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero +; AVX512BW-NEXT: vpmullw %zmm2, %zmm3, %zmm2 +; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 +; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: retq %res = udiv <64 x i8> %a, ret <64 x i8> %res @@ -1005,781 +440,33 @@ ; ; AVX512BW-LABEL: test_rem7_64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; AVX512BW-NEXT: vpextrb $1, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %cl, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: movb $7, %cl -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm1, %esi -; AVX512BW-NEXT: imull $37, %esi, %edi -; AVX512BW-NEXT: shrl $8, %edi -; AVX512BW-NEXT: movl %esi, %eax -; AVX512BW-NEXT: subb %dil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %dil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %sil -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm2 -; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $2, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $3, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $4, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $5, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $6, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $7, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $8, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $9, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $10, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $11, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $12, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $13, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $14, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512BW-NEXT: vpextrb $15, %xmm1, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 -; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm2, %esi -; AVX512BW-NEXT: imull $37, %esi, %edi -; AVX512BW-NEXT: shrl $8, %edi -; AVX512BW-NEXT: movl %esi, %eax -; AVX512BW-NEXT: subb %dil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %dil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %sil -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm2, %esi -; AVX512BW-NEXT: imull $37, %esi, %edi -; AVX512BW-NEXT: shrl $8, %edi -; AVX512BW-NEXT: movl %esi, %eax -; AVX512BW-NEXT: subb %dil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %dil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %sil -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 -; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %edx -; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi -; AVX512BW-NEXT: imull $37, %esi, %edi -; AVX512BW-NEXT: shrl $8, %edi -; AVX512BW-NEXT: movl %esi, %eax -; AVX512BW-NEXT: subb %dil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %dil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %sil -; AVX512BW-NEXT: movzbl %sil, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm3 -; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $3, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $4, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $7, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $11, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $12, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512BW-NEXT: vpextrb $15, %xmm0, %edx -; AVX512BW-NEXT: imull $37, %edx, %esi -; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax -; AVX512BW-NEXT: subb %sil, %al -; AVX512BW-NEXT: shrb %al -; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: shrb $2, %al -; AVX512BW-NEXT: mulb %cl -; AVX512BW-NEXT: subb %al, %dl -; AVX512BW-NEXT: movzbl %dl, %eax -; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] +; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 +; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm3 +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero +; AVX512BW-NEXT: vpmullw %zmm2, %zmm3, %zmm2 +; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 +; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpsrlw $1, %zmm2, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1 +; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm2 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX512BW-NEXT: vpmullw %zmm3, %zmm2, %zmm2 +; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2 +; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1 +; AVX512BW-NEXT: vpmullw %zmm3, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 +; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %res = urem <64 x i8> %a, ret <64 x i8> %res