diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5916,11 +5916,29 @@ // Widen the vector if needed. Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); - // Clear the upper bits of the subvector and move it to its insert position. unsigned ShiftLeft = NumElems - SubVecNumElems; + unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; + + // Do an optimization for the the most frequently used types. + if (WideOpVT != MVT::v64i1 || Subtarget.is64Bit()) { + APInt Mask0 = APInt::getBitsSet(NumElems, IdxVal, IdxVal + SubVecNumElems); + Mask0.flipAllBits(); + SDValue CMask0 = DAG.getConstant(Mask0, dl, MVT::getIntegerVT(NumElems)); + SDValue VMask0 = DAG.getNode(ISD::BITCAST, dl, WideOpVT, CMask0); + Vec = DAG.getNode(ISD::AND, dl, WideOpVT, Vec, VMask0); + SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, + DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); + SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, + DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); + Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); + + // Reduce to original width if needed. + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); + } + + // Clear the upper bits of the subvector and move it to its insert position. SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); - unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll --- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll +++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll @@ -531,246 +531,228 @@ ; KNL-NEXT: pushq %r12 ; KNL-NEXT: pushq %rbx ; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: movw $-3, %di +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kshiftlw $2, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $14, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: kshiftlw $3, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: movw $-5, %di +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $2, %k1, %k1 -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $13, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 -; KNL-NEXT: kshiftlw $4, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: movw $-9, %di +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $3, %k1, %k1 -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $12, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: kshiftlw $5, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: movw $-17, %di +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $4, %k1, %k1 -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $11, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 -; KNL-NEXT: kshiftlw $6, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: movw $-33, %di +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $5, %k1, %k1 -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $10, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 -; KNL-NEXT: kshiftlw $7, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: movw $-65, %di +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $6, %k1, %k1 -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $9, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $9, %k0, %k0 -; KNL-NEXT: kshiftrw $9, %k0, %k0 +; KNL-NEXT: movw $-129, %di +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $7, %k1, %k1 -; KNL-NEXT: kshiftlw $8, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $8, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $8, %k0, %k0 -; KNL-NEXT: kshiftrw $8, %k0, %k0 +; KNL-NEXT: movw $-257, %di ## imm = 0xFEFF +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $8, %k1, %k1 -; KNL-NEXT: kshiftlw $9, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $7, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $7, %k0, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k0 +; KNL-NEXT: movw $-513, %di ## imm = 0xFDFF +; KNL-NEXT: kmovw %edi, %k5 +; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $9, %k1, %k1 -; KNL-NEXT: kshiftlw $10, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $6, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $6, %k0, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k0 +; KNL-NEXT: movw $-1025, %di ## imm = 0xFBFF +; KNL-NEXT: kmovw %edi, %k4 +; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $10, %k1, %k1 -; KNL-NEXT: kshiftlw $11, %k0, %k6 -; KNL-NEXT: korw %k1, %k6, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $5, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $5, %k0, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k0 +; KNL-NEXT: movw $-2049, %di ## imm = 0xF7FF +; KNL-NEXT: kmovw %edi, %k3 +; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $11, %k1, %k1 -; KNL-NEXT: kshiftlw $12, %k0, %k5 -; KNL-NEXT: korw %k1, %k5, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $4, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $4, %k0, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k0 +; KNL-NEXT: movw $-4097, %di ## imm = 0xEFFF +; KNL-NEXT: kmovw %edi, %k2 +; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $12, %k1, %k1 -; KNL-NEXT: kshiftlw $13, %k0, %k4 -; KNL-NEXT: korw %k1, %k4, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $3, %k0, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k2 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $3, %k1, %k1 +; KNL-NEXT: korw %k1, %k0, %k1 +; KNL-NEXT: movw $-8193, %di ## imm = 0xDFFF +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kandw %k0, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $2, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k6 +; KNL-NEXT: movw $-16385, %di ## imm = 0xBFFF ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kshiftlw $13, %k1, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k3 -; KNL-NEXT: korw %k0, %k3, %k0 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftlw $2, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k2 +; KNL-NEXT: kandw %k1, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k1 -; KNL-NEXT: korw %k0, %k1, %k0 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kshiftlw $1, %k6, %k6 +; KNL-NEXT: kshiftrw $1, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kmovw %edx, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kmovw %esi, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $2, %k2, %k2 +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kmovw %esi, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k7, %k2 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kmovw %r8d, %k2 -; KNL-NEXT: kshiftlw $3, %k2, %k2 +; KNL-NEXT: kandw %k7, %k6, %k6 +; KNL-NEXT: kmovw %edx, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k7, %k2 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kmovw %r9d, %k2 -; KNL-NEXT: kshiftlw $4, %k2, %k2 +; KNL-NEXT: kandw %k7, %k6, %k6 +; KNL-NEXT: kmovw %ecx, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $13, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k7, %k2 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $5, %k2, %k2 +; KNL-NEXT: kandw %k7, %k6, %k6 +; KNL-NEXT: kmovw %r8d, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $12, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k7, %k2 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $6, %k2, %k2 +; KNL-NEXT: kandw %k7, %k6, %k6 +; KNL-NEXT: kmovw %r9d, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $11, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k7, %k2 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 +; KNL-NEXT: kandw %k7, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $7, %k2, %k2 +; KNL-NEXT: kmovw %ecx, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $10, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k7, %k2 -; KNL-NEXT: kshiftlw $9, %k0, %k0 -; KNL-NEXT: kshiftrw $9, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 +; KNL-NEXT: kandw %k7, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $8, %k2, %k2 +; KNL-NEXT: kmovw %ecx, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $9, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k7, %k2 -; KNL-NEXT: kshiftlw $8, %k0, %k0 -; KNL-NEXT: kshiftrw $8, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 +; KNL-NEXT: kandw %k7, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $9, %k2, %k2 +; KNL-NEXT: kmovw %ecx, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $8, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k7, %k2 -; KNL-NEXT: kshiftlw $7, %k0, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 +; KNL-NEXT: kandw %k7, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $10, %k2, %k2 -; KNL-NEXT: korw %k2, %k6, %k2 -; KNL-NEXT: kshiftlw $6, %k0, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 +; KNL-NEXT: kmovw %ecx, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $7, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kandw %k5, %k6, %k5 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl +; KNL-NEXT: kmovw %ecx, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $6, %k6, %k6 +; KNL-NEXT: korw %k6, %k5, %k5 +; KNL-NEXT: kandw %k4, %k5, %k4 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl +; KNL-NEXT: kmovw %ecx, %k5 +; KNL-NEXT: kshiftlw $15, %k5, %k5 +; KNL-NEXT: kshiftrw $5, %k5, %k5 +; KNL-NEXT: korw %k5, %k4, %k4 +; KNL-NEXT: kandw %k3, %k4, %k3 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl +; KNL-NEXT: kmovw %ecx, %k4 +; KNL-NEXT: kshiftlw $15, %k4, %k4 +; KNL-NEXT: kshiftrw $4, %k4, %k4 +; KNL-NEXT: korw %k4, %k3, %k3 +; KNL-NEXT: kandw %k2, %k3, %k2 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl +; KNL-NEXT: kmovw %ecx, %k3 +; KNL-NEXT: kshiftlw $15, %k3, %k3 +; KNL-NEXT: kshiftrw $3, %k3, %k3 +; KNL-NEXT: korw %k3, %k2, %k2 +; KNL-NEXT: kandw %k0, %k2, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $11, %k2, %k2 -; KNL-NEXT: korw %k2, %k5, %k2 +; KNL-NEXT: kshiftlw $15, %k2, %k2 +; KNL-NEXT: kshiftrw $2, %k2, %k2 +; KNL-NEXT: korw %k2, %k0, %k0 ; KNL-NEXT: xorl %ecx, %ecx ; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp) ; KNL-NEXT: movl $65535, %edx ## imm = 0xFFFF ; KNL-NEXT: movl $0, %esi ; KNL-NEXT: cmovnel %edx, %esi -; KNL-NEXT: kshiftlw $5, %k0, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $12, %k2, %k2 -; KNL-NEXT: korw %k2, %k4, %k2 ; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp) -; KNL-NEXT: kshiftlw $4, %k0, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $13, %k2, %k2 -; KNL-NEXT: korw %k2, %k3, %k2 ; KNL-NEXT: cmovnel %edx, %ecx -; KNL-NEXT: kshiftlw $3, %k0, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dl -; KNL-NEXT: kmovw %edx, %k2 -; KNL-NEXT: kshiftlw $14, %k2, %k2 -; KNL-NEXT: korw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $2, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k0 +; KNL-NEXT: kmovw %edx, %k1 +; KNL-NEXT: kshiftlw $14, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 @@ -877,294 +859,225 @@ ; SKX-NEXT: pushq %r13 ; SKX-NEXT: pushq %r12 ; SKX-NEXT: pushq %rbx -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 ; SKX-NEXT: movq %rdi, %rax -; SKX-NEXT: kshiftld $31, %k0, %k0 -; SKX-NEXT: kshiftrd $31, %k0, %k1 -; SKX-NEXT: kshiftld $2, %k0, %k0 -; SKX-NEXT: kord %k0, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $30, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $3, %k1, %k2 -; SKX-NEXT: kshiftld $3, %k2, %k2 -; SKX-NEXT: kshiftld $30, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: movl $-3, %edi +; SKX-NEXT: kmovd %edi, %k2 +; SKX-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kandd %k2, %k0, %k0 +; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $30, %k1, %k1 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kshiftld $31, %k3, %k2 -; SKX-NEXT: kshiftrd $29, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $4, %k1, %k2 -; SKX-NEXT: kshiftld $4, %k2, %k2 -; SKX-NEXT: kshiftld $29, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: movl $-5, %edi +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $29, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kshiftld $31, %k3, %k2 -; SKX-NEXT: kshiftrd $28, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $5, %k1, %k2 -; SKX-NEXT: kshiftld $5, %k2, %k2 -; SKX-NEXT: kshiftld $28, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: movl $-9, %edi +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $28, %k1, %k1 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $27, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $6, %k1, %k2 -; SKX-NEXT: kshiftld $6, %k2, %k2 -; SKX-NEXT: kshiftld $27, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: movl $-17, %edi +; SKX-NEXT: kmovd %edi, %k2 +; SKX-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kandd %k2, %k0, %k0 +; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $27, %k1, %k1 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $26, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $7, %k1, %k2 -; SKX-NEXT: kshiftld $7, %k2, %k2 -; SKX-NEXT: kshiftld $26, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: movl $-33, %edi +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $26, %k1, %k1 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $25, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $8, %k1, %k2 -; SKX-NEXT: kshiftld $8, %k2, %k2 -; SKX-NEXT: kshiftld $25, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: movl $-65, %edi +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $25, %k1, %k1 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $24, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $9, %k1, %k2 -; SKX-NEXT: kshiftld $9, %k2, %k2 -; SKX-NEXT: kshiftld $24, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: movl $-129, %edi +; SKX-NEXT: kmovd %edi, %k2 +; SKX-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kandd %k2, %k0, %k0 +; SKX-NEXT: kshiftld $31, %k1, %k1 +; SKX-NEXT: kshiftrd $24, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: movl $-257, %edi ## imm = 0xFEFF +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 +; SKX-NEXT: kshiftrd $23, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: movl $-513, %edi ## imm = 0xFDFF +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 +; SKX-NEXT: kshiftrd $22, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: movl $-1025, %edi ## imm = 0xFBFF +; SKX-NEXT: kmovd %edi, %k6 +; SKX-NEXT: kandd %k6, %k0, %k0 +; SKX-NEXT: kshiftld $31, %k1, %k1 +; SKX-NEXT: kshiftrd $21, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: movl $-2049, %edi ## imm = 0xF7FF +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 +; SKX-NEXT: kshiftrd $20, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: movl $-4097, %edi ## imm = 0xEFFF +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 +; SKX-NEXT: kshiftrd $19, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: movl $-8193, %edi ## imm = 0xDFFF +; SKX-NEXT: kmovd %edi, %k4 +; SKX-NEXT: kandd %k4, %k0, %k0 +; SKX-NEXT: kshiftld $31, %k1, %k1 +; SKX-NEXT: kshiftrd $18, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: movl $-16385, %edi ## imm = 0xBFFF +; SKX-NEXT: kmovd %edi, %k5 +; SKX-NEXT: kandd %k5, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 +; SKX-NEXT: kshiftrd $17, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: movl $-32769, %edi ## imm = 0xFFFF7FFF +; SKX-NEXT: kmovd %edi, %k3 +; SKX-NEXT: kandd %k3, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 +; SKX-NEXT: kshiftld $31, %k7, %k7 +; SKX-NEXT: kshiftrd $16, %k7, %k7 +; SKX-NEXT: kord %k7, %k0, %k7 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; SKX-NEXT: movl $-65537, %edi ## imm = 0xFFFEFFFF +; SKX-NEXT: kmovd %edi, %k2 +; SKX-NEXT: kandd %k2, %k7, %k7 +; SKX-NEXT: kshiftld $31, %k0, %k0 +; SKX-NEXT: kshiftrd $15, %k0, %k0 +; SKX-NEXT: kord %k0, %k7, %k0 +; SKX-NEXT: kmovd %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kmovd %esi, %k0 +; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovd %edx, %k7 +; SKX-NEXT: kshiftld $31, %k7, %k7 +; SKX-NEXT: kshiftrd $30, %k7, %k7 +; SKX-NEXT: kord %k7, %k0, %k0 +; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovd %ecx, %k7 +; SKX-NEXT: kshiftld $31, %k7, %k7 +; SKX-NEXT: kshiftrd $29, %k7, %k7 +; SKX-NEXT: kord %k7, %k0, %k0 +; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovd %r8d, %k7 +; SKX-NEXT: kshiftld $31, %k7, %k7 +; SKX-NEXT: kshiftrd $28, %k7, %k7 +; SKX-NEXT: kord %k7, %k0, %k0 +; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovd %r9d, %k7 +; SKX-NEXT: kshiftld $31, %k7, %k7 +; SKX-NEXT: kshiftrd $27, %k7, %k7 +; SKX-NEXT: kord %k7, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 +; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload +; SKX-NEXT: kandd %k1, %k0, %k1 +; SKX-NEXT: kshiftld $31, %k7, %k7 +; SKX-NEXT: kshiftrd $26, %k7, %k7 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; SKX-NEXT: kord %k7, %k1, %k1 +; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 4-byte Reload +; SKX-NEXT: kandd %k7, %k1, %k1 +; SKX-NEXT: kshiftld $31, %k0, %k0 +; SKX-NEXT: kshiftrd $25, %k0, %k0 +; SKX-NEXT: kord %k0, %k1, %k0 +; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $24, %k1, %k1 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kshiftld $31, %k3, %k2 -; SKX-NEXT: kshiftrd $23, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $10, %k1, %k2 -; SKX-NEXT: kshiftld $10, %k2, %k2 -; SKX-NEXT: kshiftld $23, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $23, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kshiftld $31, %k3, %k2 -; SKX-NEXT: kshiftrd $22, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $11, %k1, %k2 -; SKX-NEXT: kshiftld $11, %k2, %k2 -; SKX-NEXT: kshiftld $22, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $22, %k1, %k1 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $21, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $12, %k1, %k2 -; SKX-NEXT: kshiftld $12, %k2, %k2 -; SKX-NEXT: kshiftld $21, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kandd %k6, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $21, %k1, %k1 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $20, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $13, %k1, %k2 -; SKX-NEXT: kshiftld $13, %k2, %k2 -; SKX-NEXT: kshiftld $20, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 4-byte Reload +; SKX-NEXT: kandd %k6, %k0, %k0 +; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $20, %k1, %k1 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $19, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $14, %k1, %k2 -; SKX-NEXT: kshiftld $14, %k2, %k2 -; SKX-NEXT: kshiftld $19, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kshiftld $31, %k6, %k1 ; SKX-NEXT: kshiftrd $19, %k1, %k1 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $18, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $15, %k1, %k2 -; SKX-NEXT: kshiftld $15, %k2, %k2 -; SKX-NEXT: kshiftld $18, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kandd %k4, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $18, %k1, %k1 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kshiftld $31, %k3, %k2 -; SKX-NEXT: kshiftrd $17, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $16, %k1, %k2 -; SKX-NEXT: kshiftld $16, %k2, %k2 -; SKX-NEXT: kshiftld $17, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kandd %k5, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $17, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kshiftld $31, %k3, %k2 -; SKX-NEXT: kshiftrd $16, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kshiftrd $17, %k1, %k2 -; SKX-NEXT: kshiftld $17, %k2, %k2 -; SKX-NEXT: kshiftld $16, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kandd %k3, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $16, %k1, %k1 -; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $15, %k2, %k2 -; SKX-NEXT: kord %k1, %k2, %k1 -; SKX-NEXT: kmovd %esi, %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $31, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kmovd %edx, %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $30, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $3, %k0, %k2 -; SKX-NEXT: kshiftld $3, %k2, %k2 -; SKX-NEXT: kshiftld $30, %k0, %k0 -; SKX-NEXT: kshiftrd $30, %k0, %k0 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kmovd %ecx, %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $29, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $4, %k0, %k2 -; SKX-NEXT: kshiftld $4, %k2, %k2 -; SKX-NEXT: kshiftld $29, %k0, %k0 -; SKX-NEXT: kshiftrd $29, %k0, %k0 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kmovd %r8d, %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $28, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $5, %k0, %k2 -; SKX-NEXT: kshiftld $5, %k2, %k2 -; SKX-NEXT: kshiftld $28, %k0, %k0 -; SKX-NEXT: kshiftrd $28, %k0, %k0 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kmovd %r9d, %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $27, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $6, %k0, %k2 -; SKX-NEXT: kshiftld $6, %k2, %k2 -; SKX-NEXT: kshiftld $27, %k0, %k0 -; SKX-NEXT: kshiftrd $27, %k0, %k0 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $26, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $7, %k0, %k2 -; SKX-NEXT: kshiftld $7, %k2, %k2 -; SKX-NEXT: kshiftld $26, %k0, %k0 -; SKX-NEXT: kshiftrd $26, %k0, %k0 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $25, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $8, %k0, %k2 -; SKX-NEXT: kshiftld $8, %k2, %k2 -; SKX-NEXT: kshiftld $25, %k0, %k0 -; SKX-NEXT: kshiftrd $25, %k0, %k0 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $24, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $9, %k0, %k2 -; SKX-NEXT: kshiftld $9, %k2, %k2 -; SKX-NEXT: kshiftld $24, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kshiftrd $24, %k0, %k0 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kshiftld $31, %k3, %k2 -; SKX-NEXT: kshiftrd $23, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $10, %k0, %k2 -; SKX-NEXT: kshiftld $10, %k2, %k2 -; SKX-NEXT: kshiftld $23, %k0, %k0 -; SKX-NEXT: kshiftrd $23, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kshiftld $31, %k3, %k2 -; SKX-NEXT: kshiftrd $22, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $11, %k0, %k2 -; SKX-NEXT: kshiftld $11, %k2, %k2 -; SKX-NEXT: kshiftld $22, %k0, %k0 -; SKX-NEXT: kshiftrd $22, %k0, %k0 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $21, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $12, %k0, %k2 -; SKX-NEXT: kshiftld $12, %k2, %k2 -; SKX-NEXT: kshiftld $21, %k0, %k0 -; SKX-NEXT: kshiftrd $21, %k0, %k0 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $20, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $13, %k0, %k2 -; SKX-NEXT: kshiftld $13, %k2, %k2 -; SKX-NEXT: kshiftld $20, %k0, %k0 -; SKX-NEXT: kshiftrd $20, %k0, %k0 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $19, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $14, %k0, %k2 -; SKX-NEXT: kshiftld $14, %k2, %k2 -; SKX-NEXT: kshiftld $19, %k0, %k0 -; SKX-NEXT: kshiftrd $19, %k0, %k0 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $18, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $15, %k0, %k2 -; SKX-NEXT: kshiftld $15, %k2, %k2 -; SKX-NEXT: kshiftld $18, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kshiftrd $18, %k0, %k0 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kshiftld $31, %k3, %k2 -; SKX-NEXT: kshiftrd $17, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $16, %k0, %k2 -; SKX-NEXT: kshiftld $16, %k2, %k2 -; SKX-NEXT: kshiftld $17, %k0, %k0 -; SKX-NEXT: kshiftrd $17, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kshiftld $31, %k3, %k2 -; SKX-NEXT: kshiftrd $16, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 -; SKX-NEXT: kshiftrd $17, %k0, %k2 -; SKX-NEXT: kshiftld $17, %k2, %k2 -; SKX-NEXT: kshiftld $16, %k0, %k0 -; SKX-NEXT: kshiftrd $16, %k0, %k0 -; SKX-NEXT: kord %k2, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $15, %k2, %k2 -; SKX-NEXT: kord %k0, %k2, %k0 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kandd %k2, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 +; SKX-NEXT: kshiftrd $15, %k1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload ; SKX-NEXT: kandd %k1, %k0, %k0 ; SKX-NEXT: kshiftrd $16, %k0, %k1 ; SKX-NEXT: kmovd %k1, %r8d @@ -1260,249 +1173,231 @@ ; KNL_X32-NEXT: pushl %edi ; KNL_X32-NEXT: pushl %esi ; KNL_X32-NEXT: subl $20, %esp +; KNL_X32-NEXT: movw $-3, %ax +; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $2, %k0, %k2 -; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $1, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k2, %k1 +; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_X32-NEXT: kshiftrw $14, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $14, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $3, %k0, %k2 -; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill +; KNL_X32-NEXT: movw $-5, %ax +; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $2, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k2, %k1 +; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_X32-NEXT: kshiftrw $13, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $13, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $13, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $4, %k0, %k2 -; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill +; KNL_X32-NEXT: movw $-9, %ax +; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $3, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k2, %k1 +; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_X32-NEXT: kshiftrw $12, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $12, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $12, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $5, %k0, %k2 -; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill +; KNL_X32-NEXT: movw $-17, %ax +; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $4, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k2, %k1 +; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_X32-NEXT: kshiftrw $11, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $11, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $11, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $6, %k0, %k2 -; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill +; KNL_X32-NEXT: movw $-33, %ax +; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $5, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k2, %k1 +; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_X32-NEXT: kshiftrw $10, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $10, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $10, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $7, %k0, %k2 -; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill +; KNL_X32-NEXT: movw $-65, %ax +; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $6, %k1, %k1 -; KNL_X32-NEXT: korw %k1, %k2, %k1 +; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_X32-NEXT: kshiftrw $9, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $9, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $9, %k0, %k0 +; KNL_X32-NEXT: movw $-129, %ax +; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $7, %k1, %k1 -; KNL_X32-NEXT: kshiftlw $8, %k0, %k2 -; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: korw %k1, %k2, %k1 +; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_X32-NEXT: kshiftrw $8, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $8, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $8, %k0, %k0 +; KNL_X32-NEXT: movw $-257, %ax ## imm = 0xFEFF +; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $8, %k1, %k1 -; KNL_X32-NEXT: kshiftlw $9, %k0, %k2 -; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: korw %k1, %k2, %k1 +; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_X32-NEXT: kshiftrw $7, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $7, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $7, %k0, %k0 +; KNL_X32-NEXT: movw $-513, %ax ## imm = 0xFDFF +; KNL_X32-NEXT: kmovw %eax, %k5 +; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $9, %k1, %k1 -; KNL_X32-NEXT: kshiftlw $10, %k0, %k2 -; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: korw %k1, %k2, %k1 +; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_X32-NEXT: kshiftrw $6, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $6, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $6, %k0, %k0 +; KNL_X32-NEXT: movw $-1025, %ax ## imm = 0xFBFF +; KNL_X32-NEXT: kmovw %eax, %k4 +; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $10, %k1, %k1 -; KNL_X32-NEXT: kshiftlw $11, %k0, %k6 -; KNL_X32-NEXT: korw %k1, %k6, %k1 +; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_X32-NEXT: kshiftrw $5, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $5, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $5, %k0, %k0 +; KNL_X32-NEXT: movw $-2049, %ax ## imm = 0xF7FF +; KNL_X32-NEXT: kmovw %eax, %k3 +; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $11, %k1, %k1 -; KNL_X32-NEXT: kshiftlw $12, %k0, %k5 -; KNL_X32-NEXT: korw %k1, %k5, %k1 +; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_X32-NEXT: kshiftrw $4, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $4, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $4, %k0, %k0 +; KNL_X32-NEXT: movw $-4097, %ax ## imm = 0xEFFF +; KNL_X32-NEXT: kmovw %eax, %k2 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $12, %k1, %k1 -; KNL_X32-NEXT: kshiftlw $13, %k0, %k4 -; KNL_X32-NEXT: korw %k1, %k4, %k1 +; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_X32-NEXT: kshiftrw $3, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $3, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $3, %k0, %k2 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_X32-NEXT: movw $-8193, %ax ## imm = 0xDFFF ; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kshiftlw $13, %k1, %k0 -; KNL_X32-NEXT: kshiftlw $14, %k0, %k3 -; KNL_X32-NEXT: korw %k0, %k3, %k0 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftlw $2, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $2, %k0, %k2 +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $2, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k6 +; KNL_X32-NEXT: movw $-16385, %ax ## imm = 0xBFFF ; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kshiftlw $14, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k1 -; KNL_X32-NEXT: korw %k0, %k1, %k0 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftlw $1, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $1, %k0, %k0 +; KNL_X32-NEXT: kandw %k0, %k6, %k6 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: korw %k2, %k0, %k0 -; KNL_X32-NEXT: kmovw %k0, (%esp) ## 2-byte Spill +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $14, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k6, %k6 +; KNL_X32-NEXT: kshiftlw $1, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $1, %k6, %k6 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kshiftlw $1, %k0, %k0 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload -; KNL_X32-NEXT: korw %k0, %k2, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k6, %k6 +; KNL_X32-NEXT: kmovw %k6, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $15, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload +; KNL_X32-NEXT: kandw %k7, %k6, %k6 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $2, %k2, %k2 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k6, %k6 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: korw %k2, %k7, %k2 -; KNL_X32-NEXT: kshiftlw $14, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kandw %k7, %k6, %k6 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $3, %k2, %k2 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k6, %k6 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: korw %k2, %k7, %k2 -; KNL_X32-NEXT: kshiftlw $13, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $13, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kandw %k7, %k6, %k6 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $4, %k2, %k2 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k6, %k6 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: korw %k2, %k7, %k2 -; KNL_X32-NEXT: kshiftlw $12, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $12, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kandw %k7, %k6, %k6 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $5, %k2, %k2 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k6, %k6 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: korw %k2, %k7, %k2 -; KNL_X32-NEXT: kshiftlw $11, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $11, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kandw %k7, %k6, %k6 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $6, %k2, %k2 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k6, %k6 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: korw %k2, %k7, %k2 -; KNL_X32-NEXT: kshiftlw $10, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $10, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kandw %k7, %k6, %k6 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $7, %k2, %k2 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k6, %k6 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: korw %k2, %k7, %k2 -; KNL_X32-NEXT: kshiftlw $9, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $9, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kandw %k7, %k6, %k6 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $8, %k2, %k2 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $8, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k6, %k6 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: korw %k2, %k7, %k2 -; KNL_X32-NEXT: kshiftlw $8, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $8, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kandw %k7, %k6, %k6 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $9, %k2, %k2 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: korw %k2, %k7, %k2 -; KNL_X32-NEXT: kshiftlw $7, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $7, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $7, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k6, %k6 +; KNL_X32-NEXT: kandw %k5, %k6, %k5 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $10, %k2, %k2 -; KNL_X32-NEXT: korw %k2, %k6, %k2 -; KNL_X32-NEXT: kshiftlw $6, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $6, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $6, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k5, %k5 +; KNL_X32-NEXT: kandw %k4, %k5, %k4 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $11, %k2, %k2 -; KNL_X32-NEXT: korw %k2, %k5, %k2 -; KNL_X32-NEXT: kshiftlw $5, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $5, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k5 +; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 +; KNL_X32-NEXT: kshiftrw $5, %k5, %k5 +; KNL_X32-NEXT: korw %k5, %k4, %k4 +; KNL_X32-NEXT: kandw %k3, %k4, %k3 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $12, %k2, %k2 -; KNL_X32-NEXT: korw %k2, %k4, %k2 -; KNL_X32-NEXT: kshiftlw $4, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $4, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k4 +; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 +; KNL_X32-NEXT: kshiftrw $4, %k4, %k4 +; KNL_X32-NEXT: korw %k4, %k3, %k3 +; KNL_X32-NEXT: kandw %k2, %k3, %k2 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $13, %k2, %k2 -; KNL_X32-NEXT: korw %k2, %k3, %k2 -; KNL_X32-NEXT: kshiftlw $3, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $3, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k3 +; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 +; KNL_X32-NEXT: kshiftrw $3, %k3, %k3 +; KNL_X32-NEXT: korw %k3, %k2, %k2 +; KNL_X32-NEXT: kandw %k1, %k2, %k1 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $14, %k2, %k2 +; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 +; KNL_X32-NEXT: kshiftrw $2, %k2, %k2 ; KNL_X32-NEXT: korw %k2, %k1, %k1 ; KNL_X32-NEXT: xorl %eax, %eax ; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) ; KNL_X32-NEXT: movl $65535, %ecx ## imm = 0xFFFF ; KNL_X32-NEXT: movl $0, %edx ; KNL_X32-NEXT: cmovnel %ecx, %edx -; KNL_X32-NEXT: kshiftlw $2, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $2, %k0, %k0 +; KNL_X32-NEXT: kandw %k0, %k1, %k0 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %bl +; KNL_X32-NEXT: kmovw %ebx, %k1 +; KNL_X32-NEXT: kshiftlw $14, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 ; KNL_X32-NEXT: kshiftlw $1, %k0, %k0 ; KNL_X32-NEXT: kshiftrw $1, %k0, %k0 @@ -1513,7 +1408,7 @@ ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) ; KNL_X32-NEXT: cmovnel %ecx, %eax -; KNL_X32-NEXT: kmovw (%esp), %k2 ## 2-byte Reload +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload ; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kandw %k1, %k2, %k1 @@ -1610,550 +1505,373 @@ ; KNL-LABEL: test17: ; KNL: ## %bb.0: ; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kshiftlw $2, %k0, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $14, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k2 -; KNL-NEXT: kshiftlw $3, %k2, %k2 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $13, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k2 -; KNL-NEXT: kshiftlw $4, %k2, %k2 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: movw $-3, %di ; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $12, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k2 -; KNL-NEXT: kshiftlw $5, %k2, %k2 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $11, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k2 -; KNL-NEXT: kshiftlw $6, %k2, %k2 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $10, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k2 -; KNL-NEXT: kshiftlw $7, %k2, %k2 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $9, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $14, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k2 -; KNL-NEXT: kshiftlw $3, %k2, %k2 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $13, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k2 -; KNL-NEXT: kshiftlw $4, %k2, %k2 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $12, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k2 -; KNL-NEXT: kshiftlw $5, %k2, %k2 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $11, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k2 -; KNL-NEXT: kshiftlw $6, %k2, %k2 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $10, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k2 -; KNL-NEXT: kshiftlw $7, %k2, %k2 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $9, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $14, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k3 -; KNL-NEXT: kshiftlw $15, %k3, %k3 -; KNL-NEXT: kshiftrw $14, %k3, %k3 -; KNL-NEXT: korw %k0, %k3, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k3 -; KNL-NEXT: kshiftlw $3, %k3, %k3 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: korw %k3, %k0, %k0 +; KNL-NEXT: movw $-5, %di +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k3 ; KNL-NEXT: kshiftlw $15, %k3, %k3 ; KNL-NEXT: kshiftrw $13, %k3, %k3 -; KNL-NEXT: korw %k0, %k3, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k3 -; KNL-NEXT: kshiftlw $4, %k3, %k3 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 -; KNL-NEXT: korw %k3, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k3 -; KNL-NEXT: kshiftlw $15, %k3, %k3 -; KNL-NEXT: kshiftrw $12, %k3, %k3 -; KNL-NEXT: korw %k0, %k3, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k3 -; KNL-NEXT: kshiftlw $5, %k3, %k3 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: korw %k3, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k3 -; KNL-NEXT: kshiftlw $15, %k3, %k3 -; KNL-NEXT: kshiftrw $11, %k3, %k3 -; KNL-NEXT: korw %k0, %k3, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k3 -; KNL-NEXT: kshiftlw $6, %k3, %k3 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 -; KNL-NEXT: korw %k3, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k3 -; KNL-NEXT: kshiftlw $15, %k3, %k3 -; KNL-NEXT: kshiftrw $10, %k3, %k3 -; KNL-NEXT: korw %k0, %k3, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k3 -; KNL-NEXT: kshiftlw $7, %k3, %k3 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 ; KNL-NEXT: korw %k3, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: movw $-9, %di ; KNL-NEXT: kmovw %edi, %k3 -; KNL-NEXT: kshiftlw $15, %k3, %k3 -; KNL-NEXT: kshiftrw $9, %k3, %k3 -; KNL-NEXT: korw %k0, %k3, %k3 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k4 -; KNL-NEXT: kshiftlw $15, %k4, %k4 -; KNL-NEXT: kshiftrw $14, %k4, %k4 -; KNL-NEXT: korw %k0, %k4, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k4 -; KNL-NEXT: kshiftlw $3, %k4, %k4 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: korw %k4, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k4 -; KNL-NEXT: kshiftlw $15, %k4, %k4 -; KNL-NEXT: kshiftrw $13, %k4, %k4 -; KNL-NEXT: korw %k0, %k4, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k4 -; KNL-NEXT: kshiftlw $4, %k4, %k4 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 -; KNL-NEXT: korw %k4, %k0, %k0 +; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k4 ; KNL-NEXT: kshiftlw $15, %k4, %k4 ; KNL-NEXT: kshiftrw $12, %k4, %k4 -; KNL-NEXT: korw %k0, %k4, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k4 -; KNL-NEXT: kshiftlw $5, %k4, %k4 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: korw %k4, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k4 -; KNL-NEXT: kshiftlw $15, %k4, %k4 -; KNL-NEXT: kshiftrw $11, %k4, %k4 -; KNL-NEXT: korw %k0, %k4, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k4 -; KNL-NEXT: kshiftlw $6, %k4, %k4 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 -; KNL-NEXT: korw %k4, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k4 -; KNL-NEXT: kshiftlw $15, %k4, %k4 -; KNL-NEXT: kshiftrw $10, %k4, %k4 -; KNL-NEXT: korw %k0, %k4, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k4 -; KNL-NEXT: kshiftlw $7, %k4, %k4 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 ; KNL-NEXT: korw %k4, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: movw $-17, %di ; KNL-NEXT: kmovw %edi, %k4 -; KNL-NEXT: kshiftlw $15, %k4, %k4 -; KNL-NEXT: kshiftrw $9, %k4, %k4 -; KNL-NEXT: korw %k0, %k4, %k4 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $14, %k5, %k5 -; KNL-NEXT: korw %k0, %k5, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k5 -; KNL-NEXT: kshiftlw $3, %k5, %k5 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: korw %k5, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $13, %k5, %k5 -; KNL-NEXT: korw %k0, %k5, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k5 -; KNL-NEXT: kshiftlw $4, %k5, %k5 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 -; KNL-NEXT: korw %k5, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $12, %k5, %k5 -; KNL-NEXT: korw %k0, %k5, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k5 -; KNL-NEXT: kshiftlw $5, %k5, %k5 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: korw %k5, %k0, %k0 +; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k5 ; KNL-NEXT: kshiftlw $15, %k5, %k5 ; KNL-NEXT: kshiftrw $11, %k5, %k5 -; KNL-NEXT: korw %k0, %k5, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k5 -; KNL-NEXT: kshiftlw $6, %k5, %k5 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 -; KNL-NEXT: korw %k5, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $10, %k5, %k5 -; KNL-NEXT: korw %k0, %k5, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k5 -; KNL-NEXT: kshiftlw $7, %k5, %k5 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 ; KNL-NEXT: korw %k5, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: movw $-33, %di ; KNL-NEXT: kmovw %edi, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $9, %k5, %k5 -; KNL-NEXT: korw %k0, %k5, %k5 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: korw %k1, %k0, %k0 +; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $14, %k6, %k6 -; KNL-NEXT: korw %k0, %k6, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k6 -; KNL-NEXT: kshiftlw $3, %k6, %k6 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kshiftrw $10, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: movw $-65, %di ; KNL-NEXT: kmovw %edi, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $13, %k6, %k6 -; KNL-NEXT: korw %k0, %k6, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k6 -; KNL-NEXT: kshiftlw $4, %k6, %k6 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 -; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kandw %k6, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $12, %k6, %k6 -; KNL-NEXT: korw %k0, %k6, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k6 -; KNL-NEXT: kshiftlw $5, %k6, %k6 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $9, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $11, %k6, %k6 -; KNL-NEXT: korw %k0, %k6, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k6 -; KNL-NEXT: kshiftlw $6, %k6, %k6 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 -; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $10, %k6, %k6 -; KNL-NEXT: korw %k0, %k6, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k6 -; KNL-NEXT: kshiftlw $7, %k6, %k6 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 -; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $9, %k6, %k6 -; KNL-NEXT: korw %k0, %k6, %k6 +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $13, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k3, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $12, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k4, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $11, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k5, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $10, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k6, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $9, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: korw %k1, %k0, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $14, %k7, %k7 -; KNL-NEXT: korw %k0, %k7, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k7 -; KNL-NEXT: kshiftlw $3, %k7, %k7 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 ; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $13, %k7, %k7 -; KNL-NEXT: korw %k0, %k7, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k7 -; KNL-NEXT: kshiftlw $4, %k7, %k7 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 ; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $12, %k7, %k7 -; KNL-NEXT: korw %k0, %k7, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k7 -; KNL-NEXT: kshiftlw $5, %k7, %k7 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 ; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $11, %k7, %k7 -; KNL-NEXT: korw %k0, %k7, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k7 -; KNL-NEXT: kshiftlw $6, %k7, %k7 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 ; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $10, %k7, %k7 -; KNL-NEXT: korw %k0, %k7, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k7 -; KNL-NEXT: kshiftlw $7, %k7, %k7 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 ; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k6, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k0, %k7, %k7 -; KNL-NEXT: kmovw %esi, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kmovw %edx, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $14, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k2 -; KNL-NEXT: kshiftlw $3, %k2, %k2 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $13, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k2 -; KNL-NEXT: kshiftlw $4, %k2, %k2 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kmovw %r8d, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $12, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k2 -; KNL-NEXT: kshiftlw $5, %k2, %k2 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kmovw %r9d, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $11, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k2 -; KNL-NEXT: kshiftlw $6, %k2, %k2 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $10, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k2 -; KNL-NEXT: kshiftlw $7, %k2, %k2 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 -; KNL-NEXT: korw %k2, %k0, %k0 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $13, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k3, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $12, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k4, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $11, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k5, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $10, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k6, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $9, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $13, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k3, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $12, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k4, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $11, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k5, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $10, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k6, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $9, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $13, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k3, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $12, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k4, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $11, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k5, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $10, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k6, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $9, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $13, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k3, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $12, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k4, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $11, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k5, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $10, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k6, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $9, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kmovw %esi, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 +; KNL-NEXT: kmovw %edx, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %ecx, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $13, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k3, %k0, %k0 +; KNL-NEXT: kmovw %r8d, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $12, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k4, %k0, %k0 +; KNL-NEXT: kmovw %r9d, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $11, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $9, %k2, %k2 -; KNL-NEXT: korw %k0, %k2, %k0 +; KNL-NEXT: kmovw %ecx, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $10, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k6, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kmovw %ecx, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $9, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $14, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: kshiftrw $3, %k1, %k2 -; KNL-NEXT: kshiftlw $3, %k2, %k2 -; KNL-NEXT: kshiftlw $14, %k1, %k1 -; KNL-NEXT: kshiftrw $14, %k1, %k1 -; KNL-NEXT: korw %k2, %k1, %k1 +; KNL-NEXT: kmovw %ecx, %k7 +; KNL-NEXT: kandw %k2, %k7, %k2 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl +; KNL-NEXT: kmovw %ecx, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k2, %k2 +; KNL-NEXT: kandw %k1, %k2, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $13, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: kshiftrw $4, %k1, %k2 -; KNL-NEXT: kshiftlw $4, %k2, %k2 -; KNL-NEXT: kshiftlw $13, %k1, %k1 -; KNL-NEXT: kshiftrw $13, %k1, %k1 ; KNL-NEXT: korw %k2, %k1, %k1 +; KNL-NEXT: kandw %k3, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $12, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: kshiftrw $5, %k1, %k2 -; KNL-NEXT: kshiftlw $5, %k2, %k2 -; KNL-NEXT: kshiftlw $12, %k1, %k1 -; KNL-NEXT: kshiftrw $12, %k1, %k1 ; KNL-NEXT: korw %k2, %k1, %k1 +; KNL-NEXT: kandw %k4, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $11, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: kshiftrw $6, %k1, %k2 -; KNL-NEXT: kshiftlw $6, %k2, %k2 -; KNL-NEXT: kshiftlw $11, %k1, %k1 -; KNL-NEXT: kshiftrw $11, %k1, %k1 ; KNL-NEXT: korw %k2, %k1, %k1 +; KNL-NEXT: kandw %k5, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $10, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: kshiftrw $7, %k1, %k2 -; KNL-NEXT: kshiftlw $7, %k2, %k2 -; KNL-NEXT: kshiftlw $10, %k1, %k1 -; KNL-NEXT: kshiftrw $10, %k1, %k1 ; KNL-NEXT: korw %k2, %k1, %k1 +; KNL-NEXT: kandw %k6, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $9, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: korw %k2, %k1, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kandw %k7, %k0, %k0 -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload @@ -2196,491 +1914,339 @@ ; SKX-LABEL: test17: ; SKX: ## %bb.0: ; SKX-NEXT: movq %rdi, %rax +; SKX-NEXT: movb $-3, %dil +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; SKX-NEXT: kshiftlb $7, %k0, %k0 -; SKX-NEXT: kshiftrb $7, %k0, %k1 -; SKX-NEXT: kshiftlb $2, %k0, %k0 -; SKX-NEXT: korb %k0, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $6, %k2, %k2 -; SKX-NEXT: korb %k1, %k2, %k1 -; SKX-NEXT: kshiftrb $3, %k1, %k2 -; SKX-NEXT: kshiftlb $3, %k2, %k2 -; SKX-NEXT: kshiftlb $6, %k1, %k1 +; SKX-NEXT: kandb %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $6, %k1, %k1 -; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: movb $-5, %dil +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; SKX-NEXT: kandb %k1, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 ; SKX-NEXT: kshiftlb $7, %k2, %k2 ; SKX-NEXT: kshiftrb $5, %k2, %k2 -; SKX-NEXT: korb %k1, %k2, %k1 -; SKX-NEXT: kshiftrb $4, %k1, %k2 -; SKX-NEXT: kshiftlb $4, %k2, %k2 -; SKX-NEXT: kshiftlb $5, %k1, %k1 -; SKX-NEXT: kshiftrb $5, %k1, %k1 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $4, %k2, %k2 -; SKX-NEXT: korb %k1, %k2, %k1 -; SKX-NEXT: kshiftrb $5, %k1, %k2 -; SKX-NEXT: kshiftlb $5, %k2, %k2 -; SKX-NEXT: kshiftlb $4, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kshiftrb $4, %k1, %k1 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k3, %k2 -; SKX-NEXT: kshiftrb $3, %k2, %k2 -; SKX-NEXT: korb %k1, %k2, %k1 -; SKX-NEXT: kshiftrb $6, %k1, %k2 -; SKX-NEXT: kshiftlb $6, %k2, %k2 -; SKX-NEXT: kshiftlb $3, %k1, %k1 -; SKX-NEXT: kshiftrb $3, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k3, %k2 -; SKX-NEXT: kshiftrb $2, %k2, %k2 -; SKX-NEXT: korb %k1, %k2, %k1 -; SKX-NEXT: kshiftrb $7, %k1, %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftlb $2, %k1, %k1 -; SKX-NEXT: kshiftrb $2, %k1, %k1 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $1, %k2, %k2 -; SKX-NEXT: korb %k1, %k2, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $7, %k2, %k2 -; SKX-NEXT: korb %k0, %k2, %k2 +; SKX-NEXT: korb %k2, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 +; SKX-NEXT: movb $-9, %dil +; SKX-NEXT: kmovd %edi, %k7 +; SKX-NEXT: kandb %k7, %k0, %k0 ; SKX-NEXT: kshiftlb $7, %k3, %k3 -; SKX-NEXT: kshiftrb $6, %k3, %k3 -; SKX-NEXT: korb %k2, %k3, %k2 -; SKX-NEXT: kshiftrb $3, %k2, %k3 -; SKX-NEXT: kshiftlb $3, %k3, %k3 -; SKX-NEXT: kshiftlb $6, %k2, %k2 -; SKX-NEXT: kshiftrb $6, %k2, %k2 -; SKX-NEXT: korb %k3, %k2, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kshiftlb $7, %k3, %k3 -; SKX-NEXT: kshiftrb $5, %k3, %k3 -; SKX-NEXT: korb %k2, %k3, %k2 -; SKX-NEXT: kshiftrb $4, %k2, %k3 -; SKX-NEXT: kshiftlb $4, %k3, %k3 -; SKX-NEXT: kshiftlb $5, %k2, %k2 -; SKX-NEXT: kshiftrb $5, %k2, %k2 -; SKX-NEXT: korb %k3, %k2, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kshiftlb $7, %k3, %k3 -; SKX-NEXT: kshiftrb $4, %k3, %k3 -; SKX-NEXT: korb %k2, %k3, %k2 -; SKX-NEXT: kshiftrb $5, %k2, %k3 -; SKX-NEXT: kshiftlb $5, %k3, %k3 -; SKX-NEXT: kshiftlb $4, %k2, %k2 -; SKX-NEXT: kshiftrb $4, %k2, %k2 -; SKX-NEXT: korb %k3, %k2, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kshiftlb $7, %k3, %k3 -; SKX-NEXT: kshiftrb $3, %k3, %k3 -; SKX-NEXT: korb %k2, %k3, %k2 -; SKX-NEXT: kshiftrb $6, %k2, %k3 -; SKX-NEXT: kshiftlb $6, %k3, %k3 -; SKX-NEXT: kshiftlb $3, %k2, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: kshiftrb $3, %k2, %k2 -; SKX-NEXT: korb %k3, %k2, %k2 -; SKX-NEXT: kshiftlb $7, %k4, %k3 -; SKX-NEXT: kshiftrb $2, %k3, %k3 -; SKX-NEXT: korb %k2, %k3, %k2 -; SKX-NEXT: kshiftrb $7, %k2, %k3 -; SKX-NEXT: kshiftlb $7, %k3, %k3 -; SKX-NEXT: kshiftlb $2, %k2, %k2 -; SKX-NEXT: kshiftrb $2, %k2, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: korb %k3, %k2, %k2 -; SKX-NEXT: kshiftlb $7, %k4, %k3 -; SKX-NEXT: kshiftrb $1, %k3, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: korb %k2, %k3, %k2 -; SKX-NEXT: kandb %k1, %k2, %k1 -; SKX-NEXT: kshiftlb $7, %k4, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kshiftrb $7, %k2, %k2 -; SKX-NEXT: korb %k0, %k2, %k2 -; SKX-NEXT: kshiftlb $7, %k3, %k3 -; SKX-NEXT: kshiftrb $6, %k3, %k3 -; SKX-NEXT: korb %k2, %k3, %k2 -; SKX-NEXT: kshiftrb $3, %k2, %k3 -; SKX-NEXT: kshiftlb $3, %k3, %k3 -; SKX-NEXT: kshiftlb $6, %k2, %k2 -; SKX-NEXT: kshiftrb $6, %k2, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: korb %k3, %k2, %k2 -; SKX-NEXT: kshiftlb $7, %k4, %k3 -; SKX-NEXT: kshiftrb $5, %k3, %k3 -; SKX-NEXT: korb %k2, %k3, %k2 -; SKX-NEXT: kshiftrb $4, %k2, %k3 -; SKX-NEXT: kshiftlb $4, %k3, %k3 -; SKX-NEXT: kshiftlb $5, %k2, %k2 -; SKX-NEXT: kshiftrb $5, %k2, %k2 -; SKX-NEXT: korb %k3, %k2, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kshiftlb $7, %k3, %k3 -; SKX-NEXT: kshiftrb $4, %k3, %k3 -; SKX-NEXT: korb %k2, %k3, %k2 -; SKX-NEXT: kshiftrb $5, %k2, %k3 -; SKX-NEXT: kshiftlb $5, %k3, %k3 -; SKX-NEXT: kshiftlb $4, %k2, %k2 -; SKX-NEXT: kshiftrb $4, %k2, %k2 -; SKX-NEXT: korb %k3, %k2, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kshiftlb $7, %k3, %k3 -; SKX-NEXT: kshiftrb $3, %k3, %k3 -; SKX-NEXT: korb %k2, %k3, %k2 -; SKX-NEXT: kshiftrb $6, %k2, %k3 -; SKX-NEXT: kshiftlb $6, %k3, %k3 -; SKX-NEXT: kshiftlb $3, %k2, %k2 -; SKX-NEXT: kshiftrb $3, %k2, %k2 -; SKX-NEXT: korb %k3, %k2, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kshiftlb $7, %k3, %k3 -; SKX-NEXT: kshiftrb $2, %k3, %k3 -; SKX-NEXT: korb %k2, %k3, %k2 -; SKX-NEXT: kshiftrb $7, %k2, %k3 -; SKX-NEXT: kshiftlb $7, %k3, %k3 -; SKX-NEXT: kshiftlb $2, %k2, %k2 -; SKX-NEXT: kshiftrb $2, %k2, %k2 -; SKX-NEXT: korb %k3, %k2, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kshiftlb $7, %k3, %k3 -; SKX-NEXT: kshiftrb $1, %k3, %k3 -; SKX-NEXT: korb %k2, %k3, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: kshiftlb $7, %k3, %k3 -; SKX-NEXT: kshiftrb $7, %k3, %k3 -; SKX-NEXT: korb %k0, %k3, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: kshiftlb $7, %k4, %k4 -; SKX-NEXT: kshiftrb $6, %k4, %k4 -; SKX-NEXT: korb %k3, %k4, %k3 -; SKX-NEXT: kshiftrb $3, %k3, %k4 -; SKX-NEXT: kshiftlb $3, %k4, %k4 -; SKX-NEXT: kshiftlb $6, %k3, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftrb $6, %k3, %k3 -; SKX-NEXT: korb %k4, %k3, %k3 -; SKX-NEXT: kshiftlb $7, %k5, %k4 -; SKX-NEXT: kshiftrb $5, %k4, %k4 -; SKX-NEXT: korb %k3, %k4, %k3 -; SKX-NEXT: kshiftrb $4, %k3, %k4 -; SKX-NEXT: kshiftlb $4, %k4, %k4 -; SKX-NEXT: kshiftlb $5, %k3, %k3 -; SKX-NEXT: kshiftrb $5, %k3, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: korb %k4, %k3, %k3 -; SKX-NEXT: kshiftlb $7, %k5, %k4 -; SKX-NEXT: kshiftrb $4, %k4, %k4 -; SKX-NEXT: korb %k3, %k4, %k3 -; SKX-NEXT: kshiftrb $5, %k3, %k4 -; SKX-NEXT: kshiftlb $5, %k4, %k4 -; SKX-NEXT: kshiftlb $4, %k3, %k3 -; SKX-NEXT: kshiftrb $4, %k3, %k3 -; SKX-NEXT: korb %k4, %k3, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: kshiftlb $7, %k4, %k4 -; SKX-NEXT: kshiftrb $3, %k4, %k4 -; SKX-NEXT: korb %k3, %k4, %k3 -; SKX-NEXT: kshiftrb $6, %k3, %k4 -; SKX-NEXT: kshiftlb $6, %k4, %k4 -; SKX-NEXT: kshiftlb $3, %k3, %k3 -; SKX-NEXT: kshiftrb $3, %k3, %k3 -; SKX-NEXT: korb %k4, %k3, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: kshiftlb $7, %k4, %k4 -; SKX-NEXT: kshiftrb $2, %k4, %k4 -; SKX-NEXT: korb %k3, %k4, %k3 -; SKX-NEXT: kshiftrb $7, %k3, %k4 -; SKX-NEXT: kshiftlb $7, %k4, %k4 -; SKX-NEXT: kshiftlb $2, %k3, %k3 -; SKX-NEXT: kshiftrb $2, %k3, %k3 -; SKX-NEXT: korb %k4, %k3, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: kshiftlb $7, %k4, %k4 -; SKX-NEXT: kshiftrb $1, %k4, %k4 -; SKX-NEXT: korb %k3, %k4, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: kshiftlb $7, %k4, %k4 -; SKX-NEXT: kshiftrb $7, %k4, %k4 -; SKX-NEXT: korb %k0, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $6, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $3, %k4, %k5 -; SKX-NEXT: kshiftlb $3, %k5, %k5 -; SKX-NEXT: kshiftlb $6, %k4, %k4 -; SKX-NEXT: kshiftrb $6, %k4, %k4 -; SKX-NEXT: korb %k5, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $5, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $4, %k4, %k5 -; SKX-NEXT: kshiftlb $4, %k5, %k5 -; SKX-NEXT: kshiftlb $5, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 -; SKX-NEXT: kshiftrb $5, %k4, %k4 -; SKX-NEXT: korb %k5, %k4, %k4 -; SKX-NEXT: kshiftlb $7, %k6, %k5 -; SKX-NEXT: kshiftrb $4, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $5, %k4, %k5 -; SKX-NEXT: kshiftlb $5, %k5, %k5 -; SKX-NEXT: kshiftlb $4, %k4, %k4 -; SKX-NEXT: kshiftrb $4, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 -; SKX-NEXT: korb %k5, %k4, %k4 -; SKX-NEXT: kshiftlb $7, %k6, %k5 -; SKX-NEXT: kshiftrb $3, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $6, %k4, %k5 -; SKX-NEXT: kshiftlb $6, %k5, %k5 -; SKX-NEXT: kshiftlb $3, %k4, %k4 -; SKX-NEXT: kshiftrb $3, %k4, %k4 -; SKX-NEXT: korb %k5, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $2, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $7, %k4, %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftlb $2, %k4, %k4 -; SKX-NEXT: kshiftrb $2, %k4, %k4 -; SKX-NEXT: korb %k5, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $1, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kandb %k3, %k4, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: kandb %k2, %k3, %k2 -; SKX-NEXT: kshiftlb $7, %k4, %k3 -; SKX-NEXT: kshiftrb $7, %k3, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: korb %k0, %k3, %k3 -; SKX-NEXT: kshiftlb $7, %k4, %k4 -; SKX-NEXT: kshiftrb $6, %k4, %k4 -; SKX-NEXT: korb %k3, %k4, %k3 -; SKX-NEXT: kshiftrb $3, %k3, %k4 -; SKX-NEXT: kshiftlb $3, %k4, %k4 -; SKX-NEXT: kshiftlb $6, %k3, %k3 -; SKX-NEXT: kshiftrb $6, %k3, %k3 -; SKX-NEXT: korb %k4, %k3, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: kshiftlb $7, %k4, %k4 -; SKX-NEXT: kshiftrb $5, %k4, %k4 -; SKX-NEXT: korb %k3, %k4, %k3 -; SKX-NEXT: kshiftrb $4, %k3, %k4 -; SKX-NEXT: kshiftlb $4, %k4, %k4 -; SKX-NEXT: kshiftlb $5, %k3, %k3 -; SKX-NEXT: kshiftrb $5, %k3, %k3 -; SKX-NEXT: korb %k4, %k3, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: kshiftlb $7, %k4, %k4 -; SKX-NEXT: kshiftrb $4, %k4, %k4 -; SKX-NEXT: korb %k3, %k4, %k3 -; SKX-NEXT: kshiftrb $5, %k3, %k4 -; SKX-NEXT: kshiftlb $5, %k4, %k4 -; SKX-NEXT: kshiftlb $4, %k3, %k3 ; SKX-NEXT: kshiftrb $4, %k3, %k3 -; SKX-NEXT: korb %k4, %k3, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: kshiftlb $7, %k4, %k4 -; SKX-NEXT: kshiftrb $3, %k4, %k4 -; SKX-NEXT: korb %k3, %k4, %k3 -; SKX-NEXT: kshiftrb $6, %k3, %k4 -; SKX-NEXT: kshiftlb $6, %k4, %k4 -; SKX-NEXT: kshiftlb $3, %k3, %k3 -; SKX-NEXT: kshiftrb $3, %k3, %k3 -; SKX-NEXT: korb %k4, %k3, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 -; SKX-NEXT: kshiftlb $7, %k4, %k4 -; SKX-NEXT: kshiftrb $2, %k4, %k4 -; SKX-NEXT: korb %k3, %k4, %k3 -; SKX-NEXT: kshiftrb $7, %k3, %k4 -; SKX-NEXT: kshiftlb $7, %k4, %k4 -; SKX-NEXT: kshiftlb $2, %k3, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftrb $2, %k3, %k3 -; SKX-NEXT: korb %k4, %k3, %k3 -; SKX-NEXT: kshiftlb $7, %k5, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftrb $1, %k4, %k4 -; SKX-NEXT: korb %k3, %k4, %k3 -; SKX-NEXT: kshiftlb $7, %k5, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftrb $7, %k4, %k4 -; SKX-NEXT: korb %k0, %k4, %k4 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $6, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $3, %k4, %k5 -; SKX-NEXT: kshiftlb $3, %k5, %k5 -; SKX-NEXT: kshiftlb $6, %k4, %k4 -; SKX-NEXT: kshiftrb $6, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 -; SKX-NEXT: korb %k5, %k4, %k4 -; SKX-NEXT: kshiftlb $7, %k6, %k5 -; SKX-NEXT: kshiftrb $5, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $4, %k4, %k5 -; SKX-NEXT: kshiftlb $4, %k5, %k5 -; SKX-NEXT: kshiftlb $5, %k4, %k4 -; SKX-NEXT: kshiftrb $5, %k4, %k4 -; SKX-NEXT: korb %k5, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $4, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $5, %k4, %k5 -; SKX-NEXT: kshiftlb $5, %k5, %k5 -; SKX-NEXT: kshiftlb $4, %k4, %k4 -; SKX-NEXT: kshiftrb $4, %k4, %k4 -; SKX-NEXT: korb %k5, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $3, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $6, %k4, %k5 -; SKX-NEXT: kshiftlb $6, %k5, %k5 -; SKX-NEXT: kshiftlb $3, %k4, %k4 -; SKX-NEXT: kshiftrb $3, %k4, %k4 -; SKX-NEXT: korb %k5, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $2, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $7, %k4, %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftlb $2, %k4, %k4 -; SKX-NEXT: kshiftrb $2, %k4, %k4 -; SKX-NEXT: korb %k5, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $1, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kandb %k3, %k4, %k3 +; SKX-NEXT: korb %k3, %k0, %k0 +; SKX-NEXT: movb $-17, %dil +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; SKX-NEXT: kandb %k1, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 ; SKX-NEXT: kshiftlb $7, %k4, %k4 -; SKX-NEXT: kshiftrb $7, %k4, %k4 -; SKX-NEXT: korb %k0, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $6, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $3, %k4, %k5 -; SKX-NEXT: kshiftlb $3, %k5, %k5 -; SKX-NEXT: kshiftlb $6, %k4, %k4 -; SKX-NEXT: kshiftrb $6, %k4, %k4 -; SKX-NEXT: korb %k5, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $5, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $4, %k4, %k5 -; SKX-NEXT: kshiftlb $4, %k5, %k5 -; SKX-NEXT: kshiftlb $5, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 -; SKX-NEXT: kshiftrb $5, %k4, %k4 -; SKX-NEXT: korb %k5, %k4, %k4 -; SKX-NEXT: kshiftlb $7, %k6, %k5 -; SKX-NEXT: kshiftrb $4, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $5, %k4, %k5 -; SKX-NEXT: kshiftlb $5, %k5, %k5 -; SKX-NEXT: kshiftlb $4, %k4, %k4 -; SKX-NEXT: kshiftrb $4, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 -; SKX-NEXT: korb %k5, %k4, %k4 -; SKX-NEXT: kshiftlb $7, %k6, %k5 -; SKX-NEXT: kshiftrb $3, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $6, %k4, %k5 -; SKX-NEXT: kshiftlb $6, %k5, %k5 -; SKX-NEXT: kshiftlb $3, %k4, %k4 ; SKX-NEXT: kshiftrb $3, %k4, %k4 -; SKX-NEXT: korb %k5, %k4, %k4 +; SKX-NEXT: korb %k4, %k0, %k0 +; SKX-NEXT: movb $-33, %dil +; SKX-NEXT: kmovd %edi, %k4 +; SKX-NEXT: kandb %k4, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 ; SKX-NEXT: kshiftlb $7, %k5, %k5 ; SKX-NEXT: kshiftrb $2, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kshiftrb $7, %k4, %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftlb $2, %k4, %k4 -; SKX-NEXT: kshiftrb $2, %k4, %k4 -; SKX-NEXT: korb %k5, %k4, %k4 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $1, %k5, %k5 -; SKX-NEXT: korb %k4, %k5, %k4 -; SKX-NEXT: kmovd %esi, %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $7, %k5, %k5 -; SKX-NEXT: korb %k0, %k5, %k0 -; SKX-NEXT: kmovd %edx, %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $6, %k5, %k5 -; SKX-NEXT: korb %k0, %k5, %k0 -; SKX-NEXT: kshiftrb $3, %k0, %k5 -; SKX-NEXT: kshiftlb $3, %k5, %k5 -; SKX-NEXT: kshiftlb $6, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k0 ; SKX-NEXT: korb %k5, %k0, %k0 -; SKX-NEXT: kmovd %ecx, %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $5, %k5, %k5 -; SKX-NEXT: korb %k0, %k5, %k0 -; SKX-NEXT: kshiftrb $4, %k0, %k5 -; SKX-NEXT: kshiftlb $4, %k5, %k5 -; SKX-NEXT: kshiftlb $5, %k0, %k0 -; SKX-NEXT: kshiftrb $5, %k0, %k0 -; SKX-NEXT: korb %k5, %k0, %k0 -; SKX-NEXT: kmovd %r8d, %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $4, %k5, %k5 -; SKX-NEXT: korb %k0, %k5, %k0 -; SKX-NEXT: kshiftrb $5, %k0, %k5 -; SKX-NEXT: kshiftlb $5, %k5, %k5 -; SKX-NEXT: kshiftlb $4, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 +; SKX-NEXT: movb $-65, %dil +; SKX-NEXT: kmovd %edi, %k5 +; SKX-NEXT: kandb %k5, %k0, %k1 +; SKX-NEXT: kshiftlb $7, %k6, %k6 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; SKX-NEXT: kshiftrb $1, %k6, %k6 +; SKX-NEXT: korb %k6, %k1, %k1 +; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; SKX-NEXT: kandb %k6, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $6, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload +; SKX-NEXT: kandb %k3, %k0, %k2 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $5, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; SKX-NEXT: korb %k1, %k2, %k1 +; SKX-NEXT: kandb %k7, %k1, %k1 +; SKX-NEXT: kshiftlb $7, %k0, %k0 ; SKX-NEXT: kshiftrb $4, %k0, %k0 -; SKX-NEXT: korb %k5, %k0, %k0 -; SKX-NEXT: kmovd %r9d, %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $3, %k5, %k5 -; SKX-NEXT: korb %k0, %k5, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k5 -; SKX-NEXT: kshiftlb $6, %k5, %k5 -; SKX-NEXT: kshiftlb $3, %k0, %k0 -; SKX-NEXT: kshiftrb $3, %k0, %k0 -; SKX-NEXT: korb %k5, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $2, %k5, %k5 -; SKX-NEXT: korb %k0, %k5, %k0 -; SKX-NEXT: kshiftrb $7, %k0, %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftlb $2, %k0, %k0 -; SKX-NEXT: kshiftrb $2, %k0, %k0 -; SKX-NEXT: korb %k5, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $1, %k5, %k5 -; SKX-NEXT: korb %k0, %k5, %k0 +; SKX-NEXT: korb %k0, %k1, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; SKX-NEXT: kandb %k2, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $3, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: kandb %k4, %k0, %k0 +; SKX-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $2, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kandb %k5, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $1, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; SKX-NEXT: kandb %k1, %k0, %k0 +; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; SKX-NEXT: kandb %k6, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $6, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: kandb %k3, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $5, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kandb %k7, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $4, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kmovq %k2, %k3 ; SKX-NEXT: kandb %k2, %k0, %k0 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $3, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kandb %k4, %k0, %k0 +; SKX-NEXT: kshiftlb $7, %k2, %k1 +; SKX-NEXT: kshiftrb $2, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kandb %k5, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $1, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kmovq %k6, %k0 +; SKX-NEXT: kandb %k6, %k1, %k1 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $6, %k2, %k2 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload +; SKX-NEXT: kandb %k4, %k1, %k1 +; SKX-NEXT: kshiftlb $7, %k6, %k2 +; SKX-NEXT: kshiftrb $5, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k7, %k1, %k1 +; SKX-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $4, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k3, %k1, %k1 +; SKX-NEXT: kmovq %k3, %k6 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $3, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; SKX-NEXT: kandb %k2, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $2, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k5, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $1, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kandb %k0, %k1, %k1 +; SKX-NEXT: kmovq %k0, %k3 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $6, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k4, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $5, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k7, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $4, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kandb %k6, %k1, %k1 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $3, %k2, %k2 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; SKX-NEXT: kandb %k7, %k1, %k1 +; SKX-NEXT: kshiftlb $7, %k0, %k0 +; SKX-NEXT: kshiftrb $2, %k0, %k0 +; SKX-NEXT: korb %k0, %k1, %k0 +; SKX-NEXT: kandb %k5, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $1, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; SKX-NEXT: kandb %k1, %k0, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; SKX-NEXT: kandb %k1, %k0, %k0 +; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; SKX-NEXT: kandb %k3, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $6, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kandb %k4, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $5, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload +; SKX-NEXT: kandb %k4, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $4, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kandb %k6, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $3, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kandb %k7, %k0, %k0 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $2, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; SKX-NEXT: kandb %k5, %k0, %k0 +; SKX-NEXT: kshiftlb $7, %k2, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kshiftrb $1, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kandb %k3, %k2, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $6, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; SKX-NEXT: kandb %k6, %k1, %k1 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $5, %k2, %k2 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k4, %k1, %k1 +; SKX-NEXT: kshiftlb $7, %k6, %k2 +; SKX-NEXT: kshiftrb $4, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; SKX-NEXT: kandb %k6, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $3, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k7, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $2, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k5, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $1, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k0, %k1, %k0 +; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; SKX-NEXT: kandb %k3, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $6, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload +; SKX-NEXT: kandb %k3, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $5, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; SKX-NEXT: kandb %k5, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $4, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kandb %k6, %k0, %k0 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $3, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kandb %k7, %k0, %k0 +; SKX-NEXT: kshiftlb $7, %k2, %k1 +; SKX-NEXT: kshiftrb $2, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload +; SKX-NEXT: kandb %k4, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $1, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovd %esi, %k1 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; SKX-NEXT: kandb %k2, %k1, %k1 +; SKX-NEXT: kmovd %edx, %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $6, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k3, %k1, %k1 +; SKX-NEXT: kmovd %ecx, %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $5, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k5, %k1, %k1 +; SKX-NEXT: kmovd %r8d, %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $4, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k6, %k1, %k1 +; SKX-NEXT: kmovd %r9d, %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $3, %k2, %k2 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k7, %k1, %k1 +; SKX-NEXT: kshiftlb $7, %k3, %k2 +; SKX-NEXT: kshiftrb $2, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k4, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $1, %k2, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k0, %k1, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; SKX-NEXT: kandb %k1, %k0, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; SKX-NEXT: kandb %k1, %k0, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; SKX-NEXT: kandb %k1, %k0, %k0 ; SKX-NEXT: kshiftrb $6, %k0, %k1 ; SKX-NEXT: kmovd %k1, %r8d @@ -2720,557 +2286,380 @@ ; KNL_X32-LABEL: test17: ; KNL_X32: ## %bb.0: ; KNL_X32-NEXT: pushl %ebx -; KNL_X32-NEXT: pushl %eax +; KNL_X32-NEXT: subl $16, %esp +; KNL_X32-NEXT: movw $-3, %ax +; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftlw $2, %k0, %k1 -; KNL_X32-NEXT: korw %k1, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $14, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $3, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $3, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $14, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 +; KNL_X32-NEXT: kshiftrw $14, %k1, %k1 +; KNL_X32-NEXT: korw %k1, %k0, %k0 +; KNL_X32-NEXT: movw $-5, %ax +; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $13, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $4, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $4, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $13, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $13, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k3 +; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 +; KNL_X32-NEXT: kshiftrw $13, %k3, %k3 +; KNL_X32-NEXT: korw %k3, %k0, %k0 +; KNL_X32-NEXT: movw $-9, %ax +; KNL_X32-NEXT: kmovw %eax, %k3 +; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $12, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $5, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $5, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $12, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $12, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k4 +; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 +; KNL_X32-NEXT: kshiftrw $12, %k4, %k4 +; KNL_X32-NEXT: korw %k4, %k0, %k0 +; KNL_X32-NEXT: movw $-17, %ax +; KNL_X32-NEXT: kmovw %eax, %k4 +; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $11, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $6, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $6, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $11, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $11, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k5 +; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 +; KNL_X32-NEXT: kshiftrw $11, %k5, %k5 +; KNL_X32-NEXT: korw %k5, %k0, %k0 +; KNL_X32-NEXT: movw $-33, %ax +; KNL_X32-NEXT: kmovw %eax, %k5 +; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $10, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $7, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $7, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $10, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $10, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 +; KNL_X32-NEXT: movw $-65, %ax +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kandw %k6, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $9, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $15, %k0, %k0 -; KNL_X32-NEXT: korw %k1, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $14, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $3, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $3, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $14, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $13, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $4, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $4, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $13, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $13, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $12, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $5, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $5, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $12, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $12, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $11, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $6, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $6, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $11, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $11, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $10, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $7, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $7, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $10, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $10, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k6, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $9, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kmovw %k0, (%esp) ## 2-byte Spill +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $15, %k0, %k0 -; KNL_X32-NEXT: korw %k1, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k3 -; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 -; KNL_X32-NEXT: kshiftrw $14, %k3, %k3 -; KNL_X32-NEXT: korw %k0, %k3, %k0 -; KNL_X32-NEXT: kshiftrw $3, %k0, %k3 -; KNL_X32-NEXT: kshiftlw $3, %k3, %k3 -; KNL_X32-NEXT: kshiftlw $14, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: korw %k3, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k3 -; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 -; KNL_X32-NEXT: kshiftrw $13, %k3, %k3 -; KNL_X32-NEXT: korw %k0, %k3, %k0 -; KNL_X32-NEXT: kshiftrw $4, %k0, %k3 -; KNL_X32-NEXT: kshiftlw $4, %k3, %k3 -; KNL_X32-NEXT: kshiftlw $13, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $13, %k0, %k0 -; KNL_X32-NEXT: korw %k3, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k3 -; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 -; KNL_X32-NEXT: kshiftrw $12, %k3, %k3 -; KNL_X32-NEXT: korw %k0, %k3, %k0 -; KNL_X32-NEXT: kshiftrw $5, %k0, %k3 -; KNL_X32-NEXT: kshiftlw $5, %k3, %k3 -; KNL_X32-NEXT: kshiftlw $12, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $12, %k0, %k0 -; KNL_X32-NEXT: korw %k3, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k3 -; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 -; KNL_X32-NEXT: kshiftrw $11, %k3, %k3 -; KNL_X32-NEXT: korw %k0, %k3, %k0 -; KNL_X32-NEXT: kshiftrw $6, %k0, %k3 -; KNL_X32-NEXT: kshiftlw $6, %k3, %k3 -; KNL_X32-NEXT: kshiftlw $11, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $11, %k0, %k0 -; KNL_X32-NEXT: korw %k3, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k3 -; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 -; KNL_X32-NEXT: kshiftrw $10, %k3, %k3 -; KNL_X32-NEXT: korw %k0, %k3, %k0 -; KNL_X32-NEXT: kshiftrw $7, %k0, %k3 -; KNL_X32-NEXT: kshiftlw $7, %k3, %k3 -; KNL_X32-NEXT: kshiftlw $10, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $10, %k0, %k0 -; KNL_X32-NEXT: korw %k3, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k6, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k3 -; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 -; KNL_X32-NEXT: kshiftrw $9, %k3, %k3 -; KNL_X32-NEXT: korw %k0, %k3, %k3 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $15, %k0, %k0 -; KNL_X32-NEXT: korw %k1, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k4 -; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 -; KNL_X32-NEXT: kshiftrw $14, %k4, %k4 -; KNL_X32-NEXT: korw %k0, %k4, %k0 -; KNL_X32-NEXT: kshiftrw $3, %k0, %k4 -; KNL_X32-NEXT: kshiftlw $3, %k4, %k4 -; KNL_X32-NEXT: kshiftlw $14, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: korw %k4, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k4 -; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 -; KNL_X32-NEXT: kshiftrw $13, %k4, %k4 -; KNL_X32-NEXT: korw %k0, %k4, %k0 -; KNL_X32-NEXT: kshiftrw $4, %k0, %k4 -; KNL_X32-NEXT: kshiftlw $4, %k4, %k4 -; KNL_X32-NEXT: kshiftlw $13, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $13, %k0, %k0 -; KNL_X32-NEXT: korw %k4, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k4 -; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 -; KNL_X32-NEXT: kshiftrw $12, %k4, %k4 -; KNL_X32-NEXT: korw %k0, %k4, %k0 -; KNL_X32-NEXT: kshiftrw $5, %k0, %k4 -; KNL_X32-NEXT: kshiftlw $5, %k4, %k4 -; KNL_X32-NEXT: kshiftlw $12, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $12, %k0, %k0 -; KNL_X32-NEXT: korw %k4, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k4 -; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 -; KNL_X32-NEXT: kshiftrw $11, %k4, %k4 -; KNL_X32-NEXT: korw %k0, %k4, %k0 -; KNL_X32-NEXT: kshiftrw $6, %k0, %k4 -; KNL_X32-NEXT: kshiftlw $6, %k4, %k4 -; KNL_X32-NEXT: kshiftlw $11, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $11, %k0, %k0 -; KNL_X32-NEXT: korw %k4, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k4 -; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 -; KNL_X32-NEXT: kshiftrw $10, %k4, %k4 -; KNL_X32-NEXT: korw %k0, %k4, %k0 -; KNL_X32-NEXT: kshiftrw $7, %k0, %k4 -; KNL_X32-NEXT: kshiftlw $7, %k4, %k4 -; KNL_X32-NEXT: kshiftlw $10, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $10, %k0, %k0 -; KNL_X32-NEXT: korw %k4, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k6, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k4 -; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 -; KNL_X32-NEXT: kshiftrw $9, %k4, %k4 -; KNL_X32-NEXT: korw %k0, %k4, %k4 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $15, %k0, %k0 -; KNL_X32-NEXT: korw %k1, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k5 -; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 -; KNL_X32-NEXT: kshiftrw $14, %k5, %k5 -; KNL_X32-NEXT: korw %k0, %k5, %k0 -; KNL_X32-NEXT: kshiftrw $3, %k0, %k5 -; KNL_X32-NEXT: kshiftlw $3, %k5, %k5 -; KNL_X32-NEXT: kshiftlw $14, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: korw %k5, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k5 -; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 -; KNL_X32-NEXT: kshiftrw $13, %k5, %k5 -; KNL_X32-NEXT: korw %k0, %k5, %k0 -; KNL_X32-NEXT: kshiftrw $4, %k0, %k5 -; KNL_X32-NEXT: kshiftlw $4, %k5, %k5 -; KNL_X32-NEXT: kshiftlw $13, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $13, %k0, %k0 -; KNL_X32-NEXT: korw %k5, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k5 -; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 -; KNL_X32-NEXT: kshiftrw $12, %k5, %k5 -; KNL_X32-NEXT: korw %k0, %k5, %k0 -; KNL_X32-NEXT: kshiftrw $5, %k0, %k5 -; KNL_X32-NEXT: kshiftlw $5, %k5, %k5 -; KNL_X32-NEXT: kshiftlw $12, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $12, %k0, %k0 -; KNL_X32-NEXT: korw %k5, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k5 -; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 -; KNL_X32-NEXT: kshiftrw $11, %k5, %k5 -; KNL_X32-NEXT: korw %k0, %k5, %k0 -; KNL_X32-NEXT: kshiftrw $6, %k0, %k5 -; KNL_X32-NEXT: kshiftlw $6, %k5, %k5 -; KNL_X32-NEXT: kshiftlw $11, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $11, %k0, %k0 -; KNL_X32-NEXT: korw %k5, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k5 -; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 -; KNL_X32-NEXT: kshiftrw $10, %k5, %k5 -; KNL_X32-NEXT: korw %k0, %k5, %k0 -; KNL_X32-NEXT: kshiftrw $7, %k0, %k5 -; KNL_X32-NEXT: kshiftlw $7, %k5, %k5 -; KNL_X32-NEXT: kshiftlw $10, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $10, %k0, %k0 -; KNL_X32-NEXT: korw %k5, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k6, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k5 -; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 -; KNL_X32-NEXT: kshiftrw $9, %k5, %k5 -; KNL_X32-NEXT: korw %k0, %k5, %k5 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $15, %k0, %k0 -; KNL_X32-NEXT: korw %k1, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $14, %k6, %k6 -; KNL_X32-NEXT: korw %k0, %k6, %k0 -; KNL_X32-NEXT: kshiftrw $3, %k0, %k6 -; KNL_X32-NEXT: kshiftlw $3, %k6, %k6 -; KNL_X32-NEXT: kshiftlw $14, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: korw %k6, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 -; KNL_X32-NEXT: korw %k0, %k6, %k0 -; KNL_X32-NEXT: kshiftrw $4, %k0, %k6 -; KNL_X32-NEXT: kshiftlw $4, %k6, %k6 -; KNL_X32-NEXT: kshiftlw $13, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $13, %k0, %k0 -; KNL_X32-NEXT: korw %k6, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 -; KNL_X32-NEXT: korw %k0, %k6, %k0 -; KNL_X32-NEXT: kshiftrw $5, %k0, %k6 -; KNL_X32-NEXT: kshiftlw $5, %k6, %k6 -; KNL_X32-NEXT: kshiftlw $12, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $12, %k0, %k0 -; KNL_X32-NEXT: korw %k6, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 -; KNL_X32-NEXT: korw %k0, %k6, %k0 -; KNL_X32-NEXT: kshiftrw $6, %k0, %k6 -; KNL_X32-NEXT: kshiftlw $6, %k6, %k6 -; KNL_X32-NEXT: kshiftlw $11, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $11, %k0, %k0 -; KNL_X32-NEXT: korw %k6, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 -; KNL_X32-NEXT: korw %k0, %k6, %k0 -; KNL_X32-NEXT: kshiftrw $7, %k0, %k6 -; KNL_X32-NEXT: kshiftlw $7, %k6, %k6 -; KNL_X32-NEXT: kshiftlw $10, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $10, %k0, %k0 -; KNL_X32-NEXT: korw %k6, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k6, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 -; KNL_X32-NEXT: korw %k0, %k6, %k6 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $15, %k0, %k0 -; KNL_X32-NEXT: korw %k1, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 -; KNL_X32-NEXT: korw %k0, %k7, %k0 -; KNL_X32-NEXT: kshiftrw $3, %k0, %k7 -; KNL_X32-NEXT: kshiftlw $3, %k7, %k7 -; KNL_X32-NEXT: kshiftlw $14, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 ; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 -; KNL_X32-NEXT: korw %k0, %k7, %k0 -; KNL_X32-NEXT: kshiftrw $4, %k0, %k7 -; KNL_X32-NEXT: kshiftlw $4, %k7, %k7 -; KNL_X32-NEXT: kshiftlw $13, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $13, %k0, %k0 ; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 -; KNL_X32-NEXT: korw %k0, %k7, %k0 -; KNL_X32-NEXT: kshiftrw $5, %k0, %k7 -; KNL_X32-NEXT: kshiftlw $5, %k7, %k7 -; KNL_X32-NEXT: kshiftlw $12, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $12, %k0, %k0 ; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 -; KNL_X32-NEXT: korw %k0, %k7, %k0 -; KNL_X32-NEXT: kshiftrw $6, %k0, %k7 -; KNL_X32-NEXT: kshiftlw $6, %k7, %k7 -; KNL_X32-NEXT: kshiftlw $11, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $11, %k0, %k0 ; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 -; KNL_X32-NEXT: korw %k0, %k7, %k0 -; KNL_X32-NEXT: kshiftrw $7, %k0, %k7 -; KNL_X32-NEXT: kshiftlw $7, %k7, %k7 -; KNL_X32-NEXT: kshiftlw $10, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $10, %k0, %k0 ; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k6, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 -; KNL_X32-NEXT: korw %k0, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $15, %k0, %k0 -; KNL_X32-NEXT: korw %k1, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $14, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $3, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $3, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $14, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $13, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $4, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $4, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $13, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $13, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $12, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $5, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $5, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $12, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $12, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $11, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $6, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $6, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $11, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $11, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $10, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 -; KNL_X32-NEXT: kshiftrw $7, %k0, %k2 -; KNL_X32-NEXT: kshiftlw $7, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $10, %k0, %k0 -; KNL_X32-NEXT: kshiftrw $10, %k0, %k0 -; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kandw %k6, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $9, %k2, %k2 -; KNL_X32-NEXT: korw %k0, %k2, %k0 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $15, %k2, %k2 -; KNL_X32-NEXT: korw %k1, %k2, %k1 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kandw %k2, %k7, %k2 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $14, %k2, %k2 -; KNL_X32-NEXT: korw %k1, %k2, %k1 -; KNL_X32-NEXT: kshiftrw $3, %k1, %k2 -; KNL_X32-NEXT: kshiftlw $3, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $14, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $14, %k1, %k1 -; KNL_X32-NEXT: korw %k2, %k1, %k1 +; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 +; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 +; KNL_X32-NEXT: korw %k7, %k2, %k2 +; KNL_X32-NEXT: kandw %k1, %k2, %k1 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $13, %k2, %k2 -; KNL_X32-NEXT: korw %k1, %k2, %k1 -; KNL_X32-NEXT: kshiftrw $4, %k1, %k2 -; KNL_X32-NEXT: kshiftlw $4, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $13, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $13, %k1, %k1 ; KNL_X32-NEXT: korw %k2, %k1, %k1 +; KNL_X32-NEXT: kandw %k3, %k1, %k1 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $12, %k2, %k2 -; KNL_X32-NEXT: korw %k1, %k2, %k1 -; KNL_X32-NEXT: kshiftrw $5, %k1, %k2 -; KNL_X32-NEXT: kshiftlw $5, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $12, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $12, %k1, %k1 ; KNL_X32-NEXT: korw %k2, %k1, %k1 +; KNL_X32-NEXT: kandw %k4, %k1, %k1 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $11, %k2, %k2 -; KNL_X32-NEXT: korw %k1, %k2, %k1 -; KNL_X32-NEXT: kshiftrw $6, %k1, %k2 -; KNL_X32-NEXT: kshiftlw $6, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $11, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $11, %k1, %k1 ; KNL_X32-NEXT: korw %k2, %k1, %k1 +; KNL_X32-NEXT: kandw %k5, %k1, %k1 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $10, %k2, %k2 -; KNL_X32-NEXT: korw %k1, %k2, %k1 -; KNL_X32-NEXT: kshiftrw $7, %k1, %k2 -; KNL_X32-NEXT: kshiftlw $7, %k2, %k2 -; KNL_X32-NEXT: kshiftlw $10, %k1, %k1 -; KNL_X32-NEXT: kshiftrw $10, %k1, %k1 ; KNL_X32-NEXT: korw %k2, %k1, %k1 +; KNL_X32-NEXT: kandw %k6, %k1, %k1 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $9, %k2, %k2 -; KNL_X32-NEXT: korw %k1, %k2, %k1 +; KNL_X32-NEXT: korw %k2, %k1, %k1 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: kandw %k7, %k0, %k0 -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: kmovw (%esp), %k1 ## 2-byte Reload +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload +; KNL_X32-NEXT: kandw %k1, %k0, %k0 +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload +; KNL_X32-NEXT: kandw %k1, %k0, %k0 +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload +; KNL_X32-NEXT: kandw %k1, %k0, %k0 +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload +; KNL_X32-NEXT: kandw %k1, %k0, %k0 +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload +; KNL_X32-NEXT: kandw %k1, %k0, %k0 +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload ; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload ; KNL_X32-NEXT: kandw %k1, %k0, %k0 @@ -3308,7 +2697,7 @@ ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_X32-NEXT: andb $127, %cl ; KNL_X32-NEXT: movb %cl, (%eax) -; KNL_X32-NEXT: addl $4, %esp +; KNL_X32-NEXT: addl $16, %esp ; KNL_X32-NEXT: popl %ebx ; KNL_X32-NEXT: retl $4 %j = and <7 x i1> %a, %b diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll --- a/llvm/test/CodeGen/X86/avx512-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -1886,474 +1886,432 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-LABEL: test21: ; KNL: # %bb.0: +; KNL-NEXT: movw $-3, %ax +; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kshiftlw $2, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k1, %k2 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kmovw %esi, %k1 -; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $14, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: kshiftlw $3, %k0, %k3 +; KNL-NEXT: movw $-5, %ax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %edx, %k1 -; KNL-NEXT: kshiftlw $2, %k1, %k1 -; KNL-NEXT: korw %k1, %k3, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $13, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 -; KNL-NEXT: kshiftlw $4, %k0, %k4 +; KNL-NEXT: movw $-9, %ax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %ecx, %k1 -; KNL-NEXT: kshiftlw $3, %k1, %k1 -; KNL-NEXT: korw %k1, %k4, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $12, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: kshiftlw $5, %k0, %k5 +; KNL-NEXT: movw $-17, %ax +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kandw %k6, %k0, %k0 +; KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kmovw %r8d, %k1 -; KNL-NEXT: kshiftlw $4, %k1, %k1 -; KNL-NEXT: korw %k1, %k5, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $11, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 -; KNL-NEXT: kshiftlw $6, %k0, %k6 +; KNL-NEXT: movw $-33, %ax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k1, %k3 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kmovw %r9d, %k1 -; KNL-NEXT: kshiftlw $5, %k1, %k1 -; KNL-NEXT: korw %k1, %k6, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $10, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 -; KNL-NEXT: kshiftlw $7, %k0, %k7 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: movw $-65, %ax ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $6, %k1, %k1 -; KNL-NEXT: korw %k1, %k7, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $9, %k0, %k0 -; KNL-NEXT: kshiftrw $9, %k0, %k0 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $7, %k1, %k1 -; KNL-NEXT: kshiftlw $8, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $9, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $8, %k0, %k0 -; KNL-NEXT: kshiftrw $8, %k0, %k0 +; KNL-NEXT: movw $-129, %ax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k1, %k4 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $8, %k1, %k1 -; KNL-NEXT: kshiftlw $9, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $8, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $7, %k0, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k0 +; KNL-NEXT: movw $-257, %ax # imm = 0xFEFF +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $9, %k1, %k1 -; KNL-NEXT: kshiftlw $10, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $7, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $6, %k0, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k0 +; KNL-NEXT: movw $-513, %ax # imm = 0xFDFF +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k1, %k5 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $10, %k1, %k1 -; KNL-NEXT: kshiftlw $11, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $6, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $5, %k0, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k0 +; KNL-NEXT: movw $-1025, %ax # imm = 0xFBFF +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $11, %k1, %k1 -; KNL-NEXT: kshiftlw $12, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $5, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $4, %k0, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k0 +; KNL-NEXT: movw $-2049, %ax # imm = 0xF7FF +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $12, %k1, %k1 -; KNL-NEXT: kshiftlw $13, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $4, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $3, %k0, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k0 +; KNL-NEXT: movw $-4097, %ax # imm = 0xEFFF +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $13, %k1, %k1 -; KNL-NEXT: kshiftlw $14, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $3, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $2, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k1 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: korw %k0, %k2, %k0 -; KNL-NEXT: korw %k0, %k1, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k0 +; KNL-NEXT: movw $-8193, %ax # imm = 0xDFFF +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $2, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: movw $-16385, %ax # imm = 0xBFFF ; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; KNL-NEXT: kandw %k0, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kshiftrw $1, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $2, %k1, %k1 -; KNL-NEXT: kmovw %k3, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: korw %k1, %k3, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $3, %k1, %k1 -; KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: korw %k1, %k4, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 +; KNL-NEXT: kandw %k2, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $4, %k1, %k1 -; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: korw %k1, %k5, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; KNL-NEXT: kandw %k0, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $5, %k1, %k1 -; KNL-NEXT: korw %k1, %k6, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $13, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload +; KNL-NEXT: kandw %k2, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $6, %k1, %k1 -; KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: korw %k1, %k7, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $9, %k0, %k0 -; KNL-NEXT: kshiftrw $9, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $12, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kandw %k6, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $7, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $8, %k0, %k0 -; KNL-NEXT: kshiftrw $8, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $11, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kandw %k3, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $8, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $7, %k0, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $10, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload +; KNL-NEXT: kandw %k6, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $9, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $6, %k0, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $9, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kandw %k4, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $10, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $5, %k0, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $8, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload +; KNL-NEXT: kandw %k3, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $11, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $4, %k0, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $7, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kandw %k5, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $12, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $3, %k0, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $6, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload +; KNL-NEXT: kandw %k4, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $13, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $2, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $5, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; KNL-NEXT: kandw %k5, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $14, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $4, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload +; KNL-NEXT: kandw %k7, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $3, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload +; KNL-NEXT: kandw %k7, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $2, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload +; KNL-NEXT: kandw %k7, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kshiftrw $1, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $2, %k1, %k1 -; KNL-NEXT: korw %k1, %k3, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $3, %k1, %k1 -; KNL-NEXT: korw %k1, %k4, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload +; KNL-NEXT: kandw %k7, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $4, %k1, %k1 -; KNL-NEXT: korw %k1, %k5, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kandw %k0, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $5, %k1, %k1 -; KNL-NEXT: korw %k1, %k6, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $13, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kandw %k2, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $6, %k1, %k1 -; KNL-NEXT: korw %k1, %k7, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $9, %k0, %k0 -; KNL-NEXT: kshiftrw $9, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $12, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; KNL-NEXT: kandw %k0, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $11, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $8, %k0, %k0 -; KNL-NEXT: kshiftrw $8, %k0, %k0 +; KNL-NEXT: kandw %k2, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $8, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; KNL-NEXT: korw %k1, %k3, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $7, %k0, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $10, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kandw %k6, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $9, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload -; KNL-NEXT: korw %k1, %k4, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $6, %k0, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $9, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload +; KNL-NEXT: kandw %k2, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $10, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; KNL-NEXT: korw %k1, %k5, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $5, %k0, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $8, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kandw %k3, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $11, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; KNL-NEXT: korw %k1, %k7, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $4, %k0, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $7, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload +; KNL-NEXT: kandw %k3, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $12, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; KNL-NEXT: korw %k1, %k7, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $3, %k0, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $6, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kandw %k4, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $13, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; KNL-NEXT: korw %k1, %k7, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $2, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $5, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kandw %k5, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $14, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; KNL-NEXT: korw %k1, %k7, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $4, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload +; KNL-NEXT: kandw %k2, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $3, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; KNL-NEXT: kandw %k5, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; KNL-NEXT: korw %k0, %k1, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $2, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; KNL-NEXT: kandw %k5, %k1, %k1 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kshiftlw $1, %k1, %k1 +; KNL-NEXT: kshiftrw $1, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $15, %k7, %k7 -; KNL-NEXT: korw %k0, %k7, %k0 +; KNL-NEXT: korw %k7, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $2, %k7, %k7 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; KNL-NEXT: korw %k7, %k1, %k7 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; KNL-NEXT: kandw %k5, %k7, %k7 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $14, %k6, %k6 +; KNL-NEXT: korw %k6, %k7, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $3, %k7, %k7 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; KNL-NEXT: korw %k7, %k1, %k7 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $13, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $4, %k7, %k7 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; KNL-NEXT: korw %k7, %k1, %k7 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $12, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kandw %k0, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $5, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k7 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $11, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; KNL-NEXT: kandw %k0, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $6, %k7, %k7 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; KNL-NEXT: korw %k7, %k1, %k7 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $10, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; KNL-NEXT: kandw %k0, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $7, %k7, %k7 -; KNL-NEXT: korw %k7, %k2, %k7 -; KNL-NEXT: kshiftlw $9, %k0, %k0 -; KNL-NEXT: kshiftrw $9, %k0, %k0 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $9, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; KNL-NEXT: kandw %k0, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $8, %k7, %k7 -; KNL-NEXT: korw %k7, %k3, %k7 -; KNL-NEXT: kshiftlw $8, %k0, %k0 -; KNL-NEXT: kshiftrw $8, %k0, %k0 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $8, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; KNL-NEXT: kandw %k0, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k4, %k7 -; KNL-NEXT: kshiftlw $7, %k0, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k0 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $7, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kandw %k3, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k5, %k6 -; KNL-NEXT: kshiftlw $6, %k0, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k0 -; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $6, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kandw %k4, %k6, %k5 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $11, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: korw %k6, %k2, %k5 -; KNL-NEXT: kshiftlw $5, %k0, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k0 -; KNL-NEXT: korw %k5, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $5, %k6, %k6 +; KNL-NEXT: korw %k6, %k5, %k5 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; KNL-NEXT: kandw %k0, %k5, %k4 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k5 -; KNL-NEXT: kshiftlw $12, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: korw %k5, %k2, %k4 -; KNL-NEXT: kshiftlw $4, %k0, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k0 -; KNL-NEXT: korw %k4, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k5, %k5 +; KNL-NEXT: kshiftrw $4, %k5, %k5 +; KNL-NEXT: korw %k5, %k4, %k4 +; KNL-NEXT: kandw %k2, %k4, %k3 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k4 -; KNL-NEXT: kshiftlw $13, %k4, %k4 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: korw %k4, %k2, %k3 -; KNL-NEXT: kshiftlw $3, %k0, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k0 -; KNL-NEXT: korw %k3, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k4, %k4 +; KNL-NEXT: kshiftrw $3, %k4, %k4 +; KNL-NEXT: korw %k4, %k3, %k3 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; KNL-NEXT: kandw %k0, %k3, %k2 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k3 -; KNL-NEXT: kshiftlw $14, %k3, %k3 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload +; KNL-NEXT: kshiftlw $15, %k3, %k3 +; KNL-NEXT: kshiftrw $2, %k3, %k3 ; KNL-NEXT: korw %k3, %k2, %k2 -; KNL-NEXT: kshiftlw $2, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; KNL-NEXT: kandw %k0, %k2, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: kmovw %eax, %k2 +; KNL-NEXT: kshiftlw $14, %k2, %k2 ; KNL-NEXT: korw %k2, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 @@ -2362,7 +2320,6 @@ ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: korw %k2, %k0, %k2 ; KNL-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z} -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z} ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; KNL-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k1} {z} @@ -2389,475 +2346,433 @@ ; ; AVX512DQNOBW-LABEL: test21: ; AVX512DQNOBW: # %bb.0: +; AVX512DQNOBW-NEXT: movw $-3, %ax +; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kmovw %edi, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $2, %k0, %k2 -; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %k1, %k2 +; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kmovw %esi, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $3, %k0, %k3 +; AVX512DQNOBW-NEXT: movw $-5, %ax +; AVX512DQNOBW-NEXT: kmovw %eax, %k1 +; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw %edx, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $2, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k3, %k1 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQNOBW-NEXT: kshiftrw $13, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $13, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $4, %k0, %k4 +; AVX512DQNOBW-NEXT: movw $-9, %ax +; AVX512DQNOBW-NEXT: kmovw %eax, %k1 +; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw %ecx, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $3, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k4, %k1 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $12, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $12, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $5, %k0, %k5 +; AVX512DQNOBW-NEXT: movw $-17, %ax +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kmovw %r8d, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $4, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k5, %k1 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQNOBW-NEXT: kshiftrw $11, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $11, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $11, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $6, %k0, %k6 +; AVX512DQNOBW-NEXT: movw $-33, %ax +; AVX512DQNOBW-NEXT: kmovw %eax, %k1 +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %k1, %k3 +; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kmovw %r9d, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $5, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k6, %k1 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQNOBW-NEXT: kshiftrw $10, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $10, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $10, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $7, %k0, %k7 +; AVX512DQNOBW-NEXT: movw $-65, %ax +; AVX512DQNOBW-NEXT: kmovw %eax, %k1 +; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $6, %k1, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k7, %k1 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQNOBW-NEXT: kshiftrw $9, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $9, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $9, %k0, %k0 +; AVX512DQNOBW-NEXT: movw $-129, %ax +; AVX512DQNOBW-NEXT: kmovw %eax, %k1 +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %k1, %k4 +; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $7, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $8, %k0, %k2 -; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQNOBW-NEXT: kshiftrw $8, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $8, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $8, %k0, %k0 +; AVX512DQNOBW-NEXT: movw $-257, %ax # imm = 0xFEFF +; AVX512DQNOBW-NEXT: kmovw %eax, %k1 +; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $8, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $9, %k0, %k2 -; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQNOBW-NEXT: kshiftrw $7, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $7, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $7, %k0, %k0 +; AVX512DQNOBW-NEXT: movw $-513, %ax # imm = 0xFDFF +; AVX512DQNOBW-NEXT: kmovw %eax, %k1 +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %k1, %k5 +; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $9, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $10, %k0, %k2 -; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQNOBW-NEXT: kshiftrw $6, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $6, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $6, %k0, %k0 +; AVX512DQNOBW-NEXT: movw $-1025, %ax # imm = 0xFBFF +; AVX512DQNOBW-NEXT: kmovw %eax, %k1 +; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $10, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $11, %k0, %k2 -; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $5, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $5, %k0, %k0 +; AVX512DQNOBW-NEXT: movw $-2049, %ax # imm = 0xF7FF +; AVX512DQNOBW-NEXT: kmovw %eax, %k1 +; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $11, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $12, %k0, %k2 -; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQNOBW-NEXT: kshiftrw $4, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $4, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $4, %k0, %k0 +; AVX512DQNOBW-NEXT: movw $-4097, %ax # imm = 0xEFFF +; AVX512DQNOBW-NEXT: kmovw %eax, %k1 +; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $12, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $13, %k0, %k2 -; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQNOBW-NEXT: kshiftrw $3, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $3, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $3, %k0, %k0 +; AVX512DQNOBW-NEXT: movw $-8193, %ax # imm = 0xDFFF +; AVX512DQNOBW-NEXT: kmovw %eax, %k1 +; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $13, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k0, %k2 -; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQNOBW-NEXT: kshiftrw $2, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $2, %k0, %k2 -; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQNOBW-NEXT: movw $-16385, %ax # imm = 0xBFFF ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k1, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k1 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: korw %k0, %k1, %k0 -; AVX512DQNOBW-NEXT: korw %k0, %k2, %k0 +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $14, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $15, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $1, %k2, %k2 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $14, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $2, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw %k3, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: korw %k2, %k3, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $13, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $3, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: korw %k2, %k4, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $12, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $12, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $4, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: korw %k2, %k5, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $11, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $11, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $5, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k6, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $10, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $10, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $6, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $9, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $9, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $7, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $8, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $8, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $8, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $7, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $9, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $6, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $6, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $10, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $5, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $5, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $5, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $11, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $4, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $4, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $4, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $12, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $3, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $3, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $3, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $13, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $2, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $2, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $14, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $15, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $1, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $14, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $2, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k3, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $13, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $3, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k4, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $12, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $12, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $4, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k5, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $11, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $11, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $5, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k6, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $10, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $10, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $6, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $9, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $9, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $7, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $8, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $8, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $8, %k2, %k2 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k3, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $7, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $7, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $9, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k4, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $6, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $6, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $10, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k5, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $5, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $5, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $5, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $11, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $4, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $4, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $4, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $12, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $3, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $3, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $3, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $13, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $2, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $2, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $2, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 +; AVX512DQNOBW-NEXT: kshiftlw $14, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k2 -; AVX512DQNOBW-NEXT: kshiftlw $1, %k2, %k2 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k2, %k0, %k2 -; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2 +; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $2, %k7, %k7 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftrw $14, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k5, %k7, %k7 +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $14, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k7, %k6 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $3, %k7, %k7 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $13, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftrw $13, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $4, %k7, %k7 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $12, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftrw $12, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 +; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $5, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k6, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $11, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftrw $11, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $6, %k7, %k7 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $10, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftrw $10, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $7, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k1, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $9, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftrw $9, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $8, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k3, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $8, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftrw $8, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $9, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k4, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $7, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftrw $7, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 +; AVX512DQNOBW-NEXT: kandw %k3, %k6, %k6 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $10, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k5, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $6, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftrw $6, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k6, %k2, %k2 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7 +; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 +; AVX512DQNOBW-NEXT: kandw %k4, %k6, %k5 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kshiftlw $11, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k5, %k5 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k6, %k1, %k5 -; AVX512DQNOBW-NEXT: kshiftlw $5, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftrw $5, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k5, %k2, %k2 +; AVX512DQNOBW-NEXT: kandw %k1, %k5, %k4 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k5 -; AVX512DQNOBW-NEXT: kshiftlw $12, %k5, %k5 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k5, %k1, %k4 -; AVX512DQNOBW-NEXT: kshiftlw $4, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftrw $4, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k4, %k2, %k2 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k5, %k5 +; AVX512DQNOBW-NEXT: kshiftrw $4, %k5, %k5 +; AVX512DQNOBW-NEXT: korw %k5, %k4, %k4 +; AVX512DQNOBW-NEXT: kandw %k2, %k4, %k3 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k4 -; AVX512DQNOBW-NEXT: kshiftlw $13, %k4, %k4 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4 +; AVX512DQNOBW-NEXT: kshiftrw $3, %k4, %k4 +; AVX512DQNOBW-NEXT: korw %k4, %k3, %k3 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k4, %k1, %k3 -; AVX512DQNOBW-NEXT: kshiftlw $3, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftrw $3, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k3, %k2, %k2 +; AVX512DQNOBW-NEXT: kandw %k1, %k3, %k2 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k3 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k3, %k3 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3 +; AVX512DQNOBW-NEXT: kshiftrw $2, %k3, %k3 +; AVX512DQNOBW-NEXT: korw %k3, %k2, %k2 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: korw %k3, %k1, %k1 -; AVX512DQNOBW-NEXT: kshiftlw $2, %k2, %k2 -; AVX512DQNOBW-NEXT: kshiftrw $2, %k2, %k2 -; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 +; AVX512DQNOBW-NEXT: kandw %k1, %k2, %k1 +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQNOBW-NEXT: kmovw %eax, %k2 +; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2 +; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al @@ -2865,7 +2780,6 @@ ; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1 ; AVX512DQNOBW-NEXT: vpmovm2d %k1, %zmm4 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm5 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm6 diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -302,14 +302,12 @@ ; KNL: ## %bb.0: ; KNL-NEXT: movb (%rdi), %al ; KNL-NEXT: kmovw %esi, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k1 -; KNL-NEXT: kshiftlw $11, %k1, %k1 -; KNL-NEXT: kshiftlw $6, %k0, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k0 -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $5, %k2, %k2 -; KNL-NEXT: korw %k2, %k1, %k1 +; KNL-NEXT: movw $-1025, %cx ## imm = 0xFBFF +; KNL-NEXT: kmovw %ecx, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $5, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax @@ -319,13 +317,11 @@ ; SKX: ## %bb.0: ; SKX-NEXT: kmovb (%rdi), %k0 ; SKX-NEXT: kmovd %esi, %k1 -; SKX-NEXT: kshiftrw $11, %k1, %k2 -; SKX-NEXT: kshiftlw $11, %k2, %k2 -; SKX-NEXT: kshiftlw $6, %k1, %k1 -; SKX-NEXT: kshiftrw $6, %k1, %k1 +; SKX-NEXT: movw $-1025, %ax ## imm = 0xFBFF +; SKX-NEXT: kmovd %eax, %k2 +; SKX-NEXT: kandw %k2, %k1, %k1 ; SKX-NEXT: kshiftlw $15, %k0, %k0 ; SKX-NEXT: kshiftrw $5, %k0, %k0 -; SKX-NEXT: korw %k0, %k2, %k0 ; SKX-NEXT: korw %k0, %k1, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax @@ -342,15 +338,13 @@ ; KNL: ## %bb.0: ; KNL-NEXT: movb (%rdi), %al ; KNL-NEXT: kmovw %esi, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k1 -; KNL-NEXT: kshiftlw $5, %k1, %k1 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: korw %k1, %k0, %k0 +; KNL-NEXT: movw $-17, %cx +; KNL-NEXT: kmovw %ecx, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $11, %k1, %k1 -; KNL-NEXT: korw %k0, %k1, %k0 +; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: retq @@ -359,13 +353,11 @@ ; SKX: ## %bb.0: ; SKX-NEXT: kmovb (%rdi), %k0 ; SKX-NEXT: kmovd %esi, %k1 -; SKX-NEXT: kshiftrb $5, %k1, %k2 -; SKX-NEXT: kshiftlb $5, %k2, %k2 -; SKX-NEXT: kshiftlb $4, %k1, %k1 -; SKX-NEXT: kshiftrb $4, %k1, %k1 +; SKX-NEXT: movb $-17, %al +; SKX-NEXT: kmovd %eax, %k2 +; SKX-NEXT: kandb %k2, %k1, %k1 ; SKX-NEXT: kshiftlb $7, %k0, %k0 ; SKX-NEXT: kshiftrb $3, %k0, %k0 -; SKX-NEXT: korb %k0, %k2, %k0 ; SKX-NEXT: korb %k0, %k1, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $al killed $al killed $eax @@ -801,15 +793,12 @@ ; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0 ; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k1 -; KNL-NEXT: kshiftlw $5, %k1, %k1 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $11, %k2, %k2 -; KNL-NEXT: korw %k2, %k1, %k1 +; KNL-NEXT: movw $-17, %dx +; KNL-NEXT: kmovw %edx, %k1 +; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0 {%k1} +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $11, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: orl %ecx, %eax @@ -823,14 +812,12 @@ ; SKX-NEXT: vpcmpltud %zmm2, %zmm0, %k0 ; SKX-NEXT: vpcmpltud %zmm3, %zmm1, %k1 ; SKX-NEXT: kunpckwd %k0, %k1, %k0 -; SKX-NEXT: kshiftrd $5, %k0, %k1 -; SKX-NEXT: kshiftld $5, %k1, %k1 -; SKX-NEXT: kshiftld $28, %k0, %k0 -; SKX-NEXT: kshiftrd $28, %k0, %k0 -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kshiftld $31, %k2, %k2 -; SKX-NEXT: kshiftrd $27, %k2, %k2 -; SKX-NEXT: kord %k2, %k1, %k1 +; SKX-NEXT: movl $-17, %ecx +; SKX-NEXT: kmovd %ecx, %k1 +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 +; SKX-NEXT: kshiftrd $27, %k1, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: vzeroupper @@ -849,16 +836,13 @@ ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: setb %al -; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k1 -; KNL-NEXT: kshiftlw $3, %k1, %k1 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: korw %k1, %k0, %k0 +; KNL-NEXT: movw $-5, %cx +; KNL-NEXT: kmovw %ecx, %k1 +; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $13, %k1, %k1 -; KNL-NEXT: korw %k0, %k1, %k0 +; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: vzeroupper @@ -868,16 +852,13 @@ ; SKX: ## %bb.0: ; SKX-NEXT: cmpl %esi, %edi ; SKX-NEXT: setb %al -; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 -; SKX-NEXT: kshiftrb $3, %k0, %k1 -; SKX-NEXT: kshiftlb $3, %k1, %k1 -; SKX-NEXT: kshiftlb $6, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k0 -; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: movb $-5, %cl +; SKX-NEXT: kmovd %ecx, %k1 +; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} ; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $5, %k1, %k1 -; SKX-NEXT: korw %k0, %k1, %k0 +; SKX-NEXT: korw %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $al killed $al killed $eax ; SKX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -1069,15 +1069,13 @@ ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kmovw %edi, %k3 -; KNL-NEXT: kshiftrw $6, %k0, %k4 -; KNL-NEXT: kshiftlw $6, %k4, %k4 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 +; KNL-NEXT: movw $-33, %ax +; KNL-NEXT: kmovw %eax, %k4 +; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb $1, %al -; KNL-NEXT: kmovw %eax, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $10, %k5, %k5 -; KNL-NEXT: korw %k5, %k4, %k4 +; KNL-NEXT: kmovw %eax, %k4 +; KNL-NEXT: kshiftlw $15, %k4, %k4 +; KNL-NEXT: kshiftrw $10, %k4, %k4 ; KNL-NEXT: korw %k4, %k0, %k4 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 @@ -1095,15 +1093,13 @@ ; SKX-LABEL: test16: ; SKX: ## %bb.0: ; SKX-NEXT: kmovq %rdi, %k0 -; SKX-NEXT: kshiftrq $6, %k0, %k1 -; SKX-NEXT: kshiftlq $6, %k1, %k1 -; SKX-NEXT: kshiftlq $59, %k0, %k0 -; SKX-NEXT: kshiftrq $59, %k0, %k0 +; SKX-NEXT: movq $-33, %rax +; SKX-NEXT: kmovq %rax, %k1 +; SKX-NEXT: kandq %k1, %k0, %k0 ; SKX-NEXT: movb $1, %al -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kshiftlq $63, %k2, %k2 -; SKX-NEXT: kshiftrq $58, %k2, %k2 -; SKX-NEXT: korq %k2, %k1, %k1 +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: kshiftlq $63, %k1, %k1 +; SKX-NEXT: kshiftrq $58, %k1, %k1 ; SKX-NEXT: korq %k1, %k0, %k0 ; SKX-NEXT: vpmovm2b %k0, %zmm0 ; SKX-NEXT: retq @@ -1111,15 +1107,13 @@ ; AVX512BW-LABEL: test16: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovq %rdi, %k0 -; AVX512BW-NEXT: kshiftrq $6, %k0, %k1 -; AVX512BW-NEXT: kshiftlq $6, %k1, %k1 -; AVX512BW-NEXT: kshiftlq $59, %k0, %k0 -; AVX512BW-NEXT: kshiftrq $59, %k0, %k0 +; AVX512BW-NEXT: movq $-33, %rax +; AVX512BW-NEXT: kmovq %rax, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 ; AVX512BW-NEXT: movb $1, %al -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kshiftlq $63, %k2, %k2 -; AVX512BW-NEXT: kshiftrq $58, %k2, %k2 -; AVX512BW-NEXT: korq %k2, %k1, %k1 +; AVX512BW-NEXT: kmovd %eax, %k1 +; AVX512BW-NEXT: kshiftlq $63, %k1, %k1 +; AVX512BW-NEXT: kshiftrq $58, %k1, %k1 ; AVX512BW-NEXT: korq %k1, %k0, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: retq @@ -1135,15 +1129,13 @@ ; AVX512DQ-NEXT: kmovw %ecx, %k0 ; AVX512DQ-NEXT: kmovw %eax, %k2 ; AVX512DQ-NEXT: kmovw %edi, %k3 -; AVX512DQ-NEXT: kshiftrw $6, %k1, %k4 -; AVX512DQ-NEXT: kshiftlw $6, %k4, %k4 -; AVX512DQ-NEXT: kshiftlw $11, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $11, %k1, %k1 +; AVX512DQ-NEXT: movw $-33, %ax +; AVX512DQ-NEXT: kmovw %eax, %k4 +; AVX512DQ-NEXT: kandw %k4, %k1, %k1 ; AVX512DQ-NEXT: movb $1, %al -; AVX512DQ-NEXT: kmovw %eax, %k5 -; AVX512DQ-NEXT: kshiftlw $15, %k5, %k5 -; AVX512DQ-NEXT: kshiftrw $10, %k5, %k5 -; AVX512DQ-NEXT: korw %k5, %k4, %k4 +; AVX512DQ-NEXT: kmovw %eax, %k4 +; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 +; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4 ; AVX512DQ-NEXT: korw %k4, %k1, %k1 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 @@ -1194,14 +1186,12 @@ ; KNL-NEXT: kmovw %edi, %k3 ; KNL-NEXT: cmpl %edx, %esi ; KNL-NEXT: setg %al -; KNL-NEXT: kshiftrw $6, %k0, %k4 -; KNL-NEXT: kshiftlw $6, %k4, %k4 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 -; KNL-NEXT: kmovw %eax, %k5 -; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $10, %k5, %k5 -; KNL-NEXT: korw %k5, %k4, %k4 +; KNL-NEXT: movw $-33, %cx +; KNL-NEXT: kmovw %ecx, %k4 +; KNL-NEXT: kandw %k4, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k4 +; KNL-NEXT: kshiftlw $15, %k4, %k4 +; KNL-NEXT: kshiftrw $10, %k4, %k4 ; KNL-NEXT: korw %k4, %k0, %k4 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 @@ -1221,14 +1211,12 @@ ; SKX-NEXT: kmovq %rdi, %k0 ; SKX-NEXT: cmpl %edx, %esi ; SKX-NEXT: setg %al -; SKX-NEXT: kshiftrq $6, %k0, %k1 -; SKX-NEXT: kshiftlq $6, %k1, %k1 -; SKX-NEXT: kshiftlq $59, %k0, %k0 -; SKX-NEXT: kshiftrq $59, %k0, %k0 -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kshiftlq $63, %k2, %k2 -; SKX-NEXT: kshiftrq $58, %k2, %k2 -; SKX-NEXT: korq %k2, %k1, %k1 +; SKX-NEXT: movq $-33, %rcx +; SKX-NEXT: kmovq %rcx, %k1 +; SKX-NEXT: kandq %k1, %k0, %k0 +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: kshiftlq $63, %k1, %k1 +; SKX-NEXT: kshiftrq $58, %k1, %k1 ; SKX-NEXT: korq %k1, %k0, %k0 ; SKX-NEXT: vpmovm2b %k0, %zmm0 ; SKX-NEXT: retq @@ -1238,14 +1226,12 @@ ; AVX512BW-NEXT: kmovq %rdi, %k0 ; AVX512BW-NEXT: cmpl %edx, %esi ; AVX512BW-NEXT: setg %al -; AVX512BW-NEXT: kshiftrq $6, %k0, %k1 -; AVX512BW-NEXT: kshiftlq $6, %k1, %k1 -; AVX512BW-NEXT: kshiftlq $59, %k0, %k0 -; AVX512BW-NEXT: kshiftrq $59, %k0, %k0 -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kshiftlq $63, %k2, %k2 -; AVX512BW-NEXT: kshiftrq $58, %k2, %k2 -; AVX512BW-NEXT: korq %k2, %k1, %k1 +; AVX512BW-NEXT: movq $-33, %rcx +; AVX512BW-NEXT: kmovq %rcx, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %eax, %k1 +; AVX512BW-NEXT: kshiftlq $63, %k1, %k1 +; AVX512BW-NEXT: kshiftrq $58, %k1, %k1 ; AVX512BW-NEXT: korq %k1, %k0, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: retq @@ -1263,14 +1249,12 @@ ; AVX512DQ-NEXT: kmovw %edi, %k3 ; AVX512DQ-NEXT: cmpl %edx, %esi ; AVX512DQ-NEXT: setg %al -; AVX512DQ-NEXT: kshiftrw $6, %k1, %k4 -; AVX512DQ-NEXT: kshiftlw $6, %k4, %k4 -; AVX512DQ-NEXT: kshiftlw $11, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $11, %k1, %k1 -; AVX512DQ-NEXT: kmovw %eax, %k5 -; AVX512DQ-NEXT: kshiftlw $15, %k5, %k5 -; AVX512DQ-NEXT: kshiftrw $10, %k5, %k5 -; AVX512DQ-NEXT: korw %k5, %k4, %k4 +; AVX512DQ-NEXT: movw $-33, %cx +; AVX512DQ-NEXT: kmovw %ecx, %k4 +; AVX512DQ-NEXT: kandw %k4, %k1, %k1 +; AVX512DQ-NEXT: kmovw %eax, %k4 +; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 +; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4 ; AVX512DQ-NEXT: korw %k4, %k1, %k1 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 @@ -1316,11 +1300,10 @@ ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: kshiftrw $8, %k1, %k2 ; KNL-NEXT: kshiftrw $9, %k1, %k1 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 -; KNL-NEXT: kshiftlw $7, %k0, %k3 +; KNL-NEXT: movw $-65, %ax +; KNL-NEXT: kmovw %eax, %k3 +; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: kshiftlw $6, %k1, %k1 -; KNL-NEXT: korw %k1, %k3, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kshiftlw $9, %k0, %k0 ; KNL-NEXT: kshiftrw $9, %k0, %k0 @@ -1338,11 +1321,10 @@ ; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: kshiftrw $8, %k1, %k2 ; SKX-NEXT: kshiftrw $9, %k1, %k1 -; SKX-NEXT: kshiftlb $2, %k0, %k0 -; SKX-NEXT: kshiftrb $2, %k0, %k0 -; SKX-NEXT: kshiftlb $7, %k0, %k3 +; SKX-NEXT: movb $-65, %al +; SKX-NEXT: kmovd %eax, %k3 +; SKX-NEXT: kandb %k3, %k0, %k0 ; SKX-NEXT: kshiftlb $6, %k1, %k1 -; SKX-NEXT: korb %k1, %k3, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: kshiftlb $1, %k0, %k0 ; SKX-NEXT: kshiftrb $1, %k0, %k0 @@ -1357,11 +1339,10 @@ ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: kshiftrw $8, %k1, %k2 ; AVX512BW-NEXT: kshiftrw $9, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $10, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $10, %k0, %k0 -; AVX512BW-NEXT: kshiftlw $7, %k0, %k3 +; AVX512BW-NEXT: movw $-65, %ax +; AVX512BW-NEXT: kmovd %eax, %k3 +; AVX512BW-NEXT: kandw %k3, %k0, %k0 ; AVX512BW-NEXT: kshiftlw $6, %k1, %k1 -; AVX512BW-NEXT: korw %k1, %k3, %k1 ; AVX512BW-NEXT: korw %k1, %k0, %k0 ; AVX512BW-NEXT: kshiftlw $9, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $9, %k0, %k0 @@ -1378,11 +1359,10 @@ ; AVX512DQ-NEXT: kmovw %esi, %k1 ; AVX512DQ-NEXT: kshiftrw $8, %k1, %k2 ; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $2, %k0, %k0 -; AVX512DQ-NEXT: kshiftrb $2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlb $7, %k0, %k3 +; AVX512DQ-NEXT: movb $-65, %al +; AVX512DQ-NEXT: kmovw %eax, %k3 +; AVX512DQ-NEXT: kandb %k3, %k0, %k0 ; AVX512DQ-NEXT: kshiftlb $6, %k1, %k1 -; AVX512DQ-NEXT: korb %k1, %k3, %k1 ; AVX512DQ-NEXT: korb %k1, %k0, %k0 ; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0 @@ -1400,11 +1380,10 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ; X86-NEXT: kshiftrw $8, %k1, %k2 ; X86-NEXT: kshiftrw $9, %k1, %k1 -; X86-NEXT: kshiftlb $7, %k0, %k3 -; X86-NEXT: kshiftlb $2, %k0, %k0 -; X86-NEXT: kshiftrb $2, %k0, %k0 +; X86-NEXT: movb $-65, %al +; X86-NEXT: kmovd %eax, %k3 +; X86-NEXT: kandb %k3, %k0, %k0 ; X86-NEXT: kshiftlb $6, %k1, %k1 -; X86-NEXT: korb %k1, %k3, %k1 ; X86-NEXT: korb %k1, %k0, %k0 ; X86-NEXT: kshiftlb $1, %k0, %k0 ; X86-NEXT: kshiftrb $1, %k0, %k0 @@ -2834,476 +2813,434 @@ ; ; KNL-LABEL: store_64i1: ; KNL: ## %bb.0: +; KNL-NEXT: movw $-3, %ax +; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %esi, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kshiftlw $2, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k1, %k2 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kmovw %edx, %k1 -; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $14, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: kshiftlw $3, %k0, %k3 +; KNL-NEXT: movw $-5, %ax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %ecx, %k1 -; KNL-NEXT: kshiftlw $2, %k1, %k1 -; KNL-NEXT: korw %k1, %k3, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $13, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 -; KNL-NEXT: kshiftlw $4, %k0, %k4 +; KNL-NEXT: movw $-9, %ax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %r8d, %k1 -; KNL-NEXT: kshiftlw $3, %k1, %k1 -; KNL-NEXT: korw %k1, %k4, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $12, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: kshiftlw $5, %k0, %k5 +; KNL-NEXT: movw $-17, %ax +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kandw %k6, %k0, %k0 +; KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kmovw %r9d, %k1 -; KNL-NEXT: kshiftlw $4, %k1, %k1 -; KNL-NEXT: korw %k1, %k5, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $11, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 -; KNL-NEXT: kshiftlw $6, %k0, %k6 +; KNL-NEXT: movw $-33, %ax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k1, %k3 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $5, %k1, %k1 -; KNL-NEXT: korw %k1, %k6, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $10, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 -; KNL-NEXT: kshiftlw $7, %k0, %k7 +; KNL-NEXT: movw $-65, %ax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $6, %k1, %k1 -; KNL-NEXT: korw %k1, %k7, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $9, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $9, %k0, %k0 -; KNL-NEXT: kshiftrw $9, %k0, %k0 +; KNL-NEXT: movw $-129, %ax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k1, %k4 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $7, %k1, %k1 -; KNL-NEXT: kshiftlw $8, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $8, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $8, %k0, %k0 -; KNL-NEXT: kshiftrw $8, %k0, %k0 +; KNL-NEXT: movw $-257, %ax ## imm = 0xFEFF +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $8, %k1, %k1 -; KNL-NEXT: kshiftlw $9, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $7, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $7, %k0, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k0 +; KNL-NEXT: movw $-513, %ax ## imm = 0xFDFF +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k1, %k5 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $9, %k1, %k1 -; KNL-NEXT: kshiftlw $10, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $6, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $6, %k0, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k0 +; KNL-NEXT: movw $-1025, %ax ## imm = 0xFBFF +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $10, %k1, %k1 -; KNL-NEXT: kshiftlw $11, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $5, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $5, %k0, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k0 +; KNL-NEXT: movw $-2049, %ax ## imm = 0xF7FF +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $11, %k1, %k1 -; KNL-NEXT: kshiftlw $12, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $4, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $4, %k0, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k0 +; KNL-NEXT: movw $-4097, %ax ## imm = 0xEFFF +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $12, %k1, %k1 -; KNL-NEXT: kshiftlw $13, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $3, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $3, %k0, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k0 +; KNL-NEXT: movw $-8193, %ax ## imm = 0xDFFF +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $13, %k1, %k1 -; KNL-NEXT: kshiftlw $14, %k0, %k2 -; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $2, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kshiftlw $2, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k2 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: movw $-16385, %ax ## imm = 0xBFFF ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $14, %k1, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k0, %k1, %k0 -; KNL-NEXT: korw %k0, %k2, %k0 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $1, %k2, %k2 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k1, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $2, %k2, %k2 -; KNL-NEXT: kmovw %k3, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k2, %k3, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $13, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $3, %k2, %k2 -; KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k2, %k4, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $12, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k6, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $4, %k2, %k2 -; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k2, %k5, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $11, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $5, %k2, %k2 -; KNL-NEXT: korw %k2, %k6, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $10, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; KNL-NEXT: kandw %k6, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $6, %k2, %k2 -; KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: korw %k2, %k7, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $9, %k0, %k0 -; KNL-NEXT: kshiftrw $9, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $9, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $7, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k1, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $8, %k0, %k0 -; KNL-NEXT: kshiftrw $8, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $8, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload +; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $8, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k1, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $7, %k0, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $7, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $9, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k1, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $6, %k0, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $6, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload +; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $10, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k1, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $5, %k0, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $5, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $11, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k1, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $4, %k0, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $4, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; KNL-NEXT: kandw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $12, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k1, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $3, %k0, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $3, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; KNL-NEXT: kandw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $13, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k1, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $2, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $2, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; KNL-NEXT: kandw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $14, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k1, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; KNL-NEXT: kandw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $1, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k1, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $2, %k2, %k2 -; KNL-NEXT: korw %k2, %k3, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $13, %k0, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $13, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $3, %k2, %k2 -; KNL-NEXT: korw %k2, %k4, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $12, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $4, %k2, %k2 -; KNL-NEXT: korw %k2, %k5, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $11, %k0, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $11, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $5, %k2, %k2 -; KNL-NEXT: korw %k2, %k6, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $10, %k0, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $10, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k6, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $6, %k2, %k2 -; KNL-NEXT: korw %k2, %k7, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $9, %k0, %k0 -; KNL-NEXT: kshiftrw $9, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $9, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $7, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k1, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $8, %k0, %k0 -; KNL-NEXT: kshiftrw $8, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $8, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $8, %k2, %k2 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $7, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k3, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $7, %k0, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k0 +; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $9, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k4, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $6, %k0, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $6, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $10, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k5, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $5, %k0, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $5, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $11, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k7, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $4, %k0, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $4, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $12, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k7, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $3, %k0, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $3, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $13, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k7, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kshiftlw $2, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $2, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $14, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k7, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k7 +; KNL-NEXT: kshiftlw $14, %k7, %k7 +; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $1, %k2, %k2 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: korw %k2, %k0, %k2 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $15, %k7, %k7 -; KNL-NEXT: korw %k2, %k7, %k2 +; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $2, %k7, %k7 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: korw %k7, %k0, %k7 -; KNL-NEXT: kshiftlw $14, %k2, %k2 -; KNL-NEXT: kshiftrw $14, %k2, %k2 -; KNL-NEXT: korw %k7, %k2, %k2 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; KNL-NEXT: kandw %k5, %k7, %k7 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $14, %k6, %k6 +; KNL-NEXT: korw %k6, %k7, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $3, %k7, %k7 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: korw %k7, %k0, %k7 -; KNL-NEXT: kshiftlw $13, %k2, %k2 -; KNL-NEXT: kshiftrw $13, %k2, %k2 -; KNL-NEXT: korw %k7, %k2, %k2 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $13, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $4, %k7, %k7 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: korw %k7, %k0, %k7 -; KNL-NEXT: kshiftlw $12, %k2, %k2 -; KNL-NEXT: kshiftrw $12, %k2, %k2 -; KNL-NEXT: korw %k7, %k2, %k2 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $12, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kandw %k1, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $5, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k7 -; KNL-NEXT: kshiftlw $11, %k2, %k2 -; KNL-NEXT: kshiftrw $11, %k2, %k2 -; KNL-NEXT: korw %k7, %k2, %k2 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $11, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; KNL-NEXT: kandw %k1, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $6, %k7, %k7 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; KNL-NEXT: korw %k7, %k0, %k7 -; KNL-NEXT: kshiftlw $10, %k2, %k2 -; KNL-NEXT: kshiftrw $10, %k2, %k2 -; KNL-NEXT: korw %k7, %k2, %k2 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $10, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; KNL-NEXT: kandw %k1, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $7, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k7 -; KNL-NEXT: kshiftlw $9, %k2, %k2 -; KNL-NEXT: kshiftrw $9, %k2, %k2 -; KNL-NEXT: korw %k7, %k2, %k2 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $9, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; KNL-NEXT: kandw %k1, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $8, %k7, %k7 -; KNL-NEXT: korw %k7, %k3, %k7 -; KNL-NEXT: kshiftlw $8, %k2, %k2 -; KNL-NEXT: kshiftrw $8, %k2, %k2 -; KNL-NEXT: korw %k7, %k2, %k2 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $8, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; KNL-NEXT: kandw %k1, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k4, %k7 -; KNL-NEXT: kshiftlw $7, %k2, %k2 -; KNL-NEXT: kshiftrw $7, %k2, %k2 -; KNL-NEXT: korw %k7, %k2, %k2 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $7, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kandw %k3, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k5, %k6 -; KNL-NEXT: kshiftlw $6, %k2, %k2 -; KNL-NEXT: kshiftrw $6, %k2, %k2 -; KNL-NEXT: korw %k6, %k2, %k2 +; KNL-NEXT: kshiftlw $15, %k7, %k7 +; KNL-NEXT: kshiftrw $6, %k7, %k7 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kandw %k4, %k6, %k5 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kshiftlw $11, %k6, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $5, %k6, %k6 +; KNL-NEXT: korw %k6, %k5, %k5 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k6, %k1, %k5 -; KNL-NEXT: kshiftlw $5, %k2, %k2 -; KNL-NEXT: kshiftrw $5, %k2, %k2 -; KNL-NEXT: korw %k5, %k2, %k2 +; KNL-NEXT: kandw %k1, %k5, %k4 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k5 -; KNL-NEXT: kshiftlw $12, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k5, %k1, %k4 -; KNL-NEXT: kshiftlw $4, %k2, %k2 -; KNL-NEXT: kshiftrw $4, %k2, %k2 -; KNL-NEXT: korw %k4, %k2, %k2 +; KNL-NEXT: kshiftlw $15, %k5, %k5 +; KNL-NEXT: kshiftrw $4, %k5, %k5 +; KNL-NEXT: korw %k5, %k4, %k4 +; KNL-NEXT: kandw %k2, %k4, %k3 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k4 -; KNL-NEXT: kshiftlw $13, %k4, %k4 +; KNL-NEXT: kshiftlw $15, %k4, %k4 +; KNL-NEXT: kshiftrw $3, %k4, %k4 +; KNL-NEXT: korw %k4, %k3, %k3 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k4, %k1, %k3 -; KNL-NEXT: kshiftlw $3, %k2, %k2 -; KNL-NEXT: kshiftrw $3, %k2, %k2 -; KNL-NEXT: korw %k3, %k2, %k2 +; KNL-NEXT: kandw %k1, %k3, %k2 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k3 -; KNL-NEXT: kshiftlw $14, %k3, %k3 +; KNL-NEXT: kshiftlw $15, %k3, %k3 +; KNL-NEXT: kshiftrw $2, %k3, %k3 +; KNL-NEXT: korw %k3, %k2, %k2 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: korw %k3, %k1, %k1 -; KNL-NEXT: kshiftlw $2, %k2, %k2 -; KNL-NEXT: kshiftrw $2, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kandw %k1, %k2, %k1 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: kmovw %eax, %k2 +; KNL-NEXT: kshiftlw $14, %k2, %k2 +; KNL-NEXT: korw %k2, %k1, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: kshiftrw $1, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al @@ -3311,7 +3248,6 @@ ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: korw %k2, %k1, %k1 ; KNL-NEXT: kmovw %k1, 6(%rdi) -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload ; KNL-NEXT: kmovw %k0, 4(%rdi) ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload ; KNL-NEXT: kmovw %k0, 2(%rdi) @@ -3337,476 +3273,434 @@ ; ; AVX512DQ-LABEL: store_64i1: ; AVX512DQ: ## %bb.0: +; AVX512DQ-NEXT: movw $-3, %ax +; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %esi, %k0 -; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $2, %k0, %k2 -; AVX512DQ-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 +; AVX512DQ-NEXT: kmovw %k1, %k2 +; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kmovw %edx, %k1 -; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k2, %k1 +; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQ-NEXT: kshiftrw $14, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $14, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $3, %k0, %k3 +; AVX512DQ-NEXT: movw $-5, %ax +; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %ecx, %k1 -; AVX512DQ-NEXT: kshiftlw $2, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k3, %k1 +; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQ-NEXT: kshiftrw $13, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $13, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $13, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $4, %k0, %k4 +; AVX512DQ-NEXT: movw $-9, %ax +; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %r8d, %k1 -; AVX512DQ-NEXT: kshiftlw $3, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k4, %k1 +; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQ-NEXT: kshiftrw $12, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $12, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $12, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $5, %k0, %k5 +; AVX512DQ-NEXT: movw $-17, %ax +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kandw %k6, %k0, %k0 +; AVX512DQ-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kmovw %r9d, %k1 -; AVX512DQ-NEXT: kshiftlw $4, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k5, %k1 +; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQ-NEXT: kshiftrw $11, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $11, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $11, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $6, %k0, %k6 +; AVX512DQ-NEXT: movw $-33, %ax +; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 +; AVX512DQ-NEXT: kmovw %k1, %k3 +; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $5, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k6, %k1 +; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQ-NEXT: kshiftrw $10, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $10, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $10, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $7, %k0, %k7 +; AVX512DQ-NEXT: movw $-65, %ax +; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $6, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k7, %k1 +; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $9, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $9, %k0, %k0 +; AVX512DQ-NEXT: movw $-129, %ax +; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 +; AVX512DQ-NEXT: kmovw %k1, %k4 +; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $8, %k0, %k2 -; AVX512DQ-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: korw %k1, %k2, %k1 +; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQ-NEXT: kshiftrw $8, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $8, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0 +; AVX512DQ-NEXT: movw $-257, %ax ## imm = 0xFEFF +; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $8, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $9, %k0, %k2 -; AVX512DQ-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: korw %k1, %k2, %k1 +; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQ-NEXT: kshiftrw $7, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $7, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $7, %k0, %k0 +; AVX512DQ-NEXT: movw $-513, %ax ## imm = 0xFDFF +; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 +; AVX512DQ-NEXT: kmovw %k1, %k5 +; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $9, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $10, %k0, %k2 -; AVX512DQ-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: korw %k1, %k2, %k1 +; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQ-NEXT: kshiftrw $6, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $6, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $6, %k0, %k0 +; AVX512DQ-NEXT: movw $-1025, %ax ## imm = 0xFBFF +; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $10, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $11, %k0, %k2 -; AVX512DQ-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: korw %k1, %k2, %k1 +; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQ-NEXT: kshiftrw $5, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $5, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0 +; AVX512DQ-NEXT: movw $-2049, %ax ## imm = 0xF7FF +; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $11, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $12, %k0, %k2 -; AVX512DQ-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: korw %k1, %k2, %k1 +; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQ-NEXT: kshiftrw $4, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $4, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $4, %k0, %k0 +; AVX512DQ-NEXT: movw $-4097, %ax ## imm = 0xEFFF +; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $12, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $13, %k0, %k2 -; AVX512DQ-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: korw %k1, %k2, %k1 +; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQ-NEXT: kshiftrw $3, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $3, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $3, %k0, %k0 +; AVX512DQ-NEXT: movw $-8193, %ax ## imm = 0xDFFF +; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $13, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $14, %k0, %k2 -; AVX512DQ-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: korw %k1, %k2, %k1 +; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 +; AVX512DQ-NEXT: kshiftrw $2, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $2, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $2, %k0, %k2 -; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQ-NEXT: movw $-16385, %ax ## imm = 0xBFFF ; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $14, %k1, %k0 -; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: korw %k0, %k1, %k0 -; AVX512DQ-NEXT: korw %k0, %k2, %k0 +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $14, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k0 -; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 +; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $14, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k1, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $14, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0 +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $2, %k2, %k2 -; AVX512DQ-NEXT: kmovw %k3, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: korw %k2, %k3, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $13, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $13, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $13, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $3, %k2, %k2 -; AVX512DQ-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: korw %k2, %k4, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $12, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $12, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $12, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kandw %k6, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $4, %k2, %k2 -; AVX512DQ-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: korw %k2, %k5, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $11, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $11, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $11, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kandw %k3, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $5, %k2, %k2 -; AVX512DQ-NEXT: korw %k2, %k6, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $10, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $10, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $10, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k6, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $6, %k2, %k2 -; AVX512DQ-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: korw %k2, %k7, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $9, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $9, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $9, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kandw %k4, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $7, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k1, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $8, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $8, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k3, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $8, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k1, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $7, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $7, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kandw %k5, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $9, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k1, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $6, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $6, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $6, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k4, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $10, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k1, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $5, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $5, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k5, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $11, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k1, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $4, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $4, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $4, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k7, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $12, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k1, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $3, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $3, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $3, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k7, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $13, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k1, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $2, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $2, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $2, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k7, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $14, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k1, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $14, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k0 -; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k7, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k1, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $14, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $14, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $2, %k2, %k2 -; AVX512DQ-NEXT: korw %k2, %k3, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $13, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $13, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $13, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $3, %k2, %k2 -; AVX512DQ-NEXT: korw %k2, %k4, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $12, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $12, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $12, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $4, %k2, %k2 -; AVX512DQ-NEXT: korw %k2, %k5, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $11, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $11, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $11, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $5, %k2, %k2 -; AVX512DQ-NEXT: korw %k2, %k6, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $10, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $10, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $10, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kandw %k6, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $6, %k2, %k2 -; AVX512DQ-NEXT: korw %k2, %k7, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $9, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $9, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $9, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $7, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k1, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $8, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $8, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kandw %k3, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $8, %k2, %k2 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $7, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k3, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $7, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $7, %k0, %k0 +; AVX512DQ-NEXT: kandw %k3, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $9, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k4, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $6, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $6, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $6, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kandw %k4, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $10, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k5, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $5, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $5, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kandw %k5, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $11, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k7, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $4, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $4, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $4, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $12, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k7, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $3, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $3, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $3, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k5, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $13, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k7, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $2, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $2, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $2, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k5, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $14, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k7, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k7 +; AVX512DQ-NEXT: kshiftlw $14, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2 -; AVX512DQ-NEXT: korw %k2, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k2, %k0, %k2 -; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $15, %k7, %k7 -; AVX512DQ-NEXT: korw %k2, %k7, %k2 +; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $2, %k7, %k7 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k7, %k0, %k7 -; AVX512DQ-NEXT: kshiftlw $14, %k2, %k2 -; AVX512DQ-NEXT: kshiftrw $14, %k2, %k2 -; AVX512DQ-NEXT: korw %k7, %k2, %k2 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k5, %k7, %k7 +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $14, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k7, %k6 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k5, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $3, %k7, %k7 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k7, %k0, %k7 -; AVX512DQ-NEXT: kshiftlw $13, %k2, %k2 -; AVX512DQ-NEXT: kshiftrw $13, %k2, %k2 -; AVX512DQ-NEXT: korw %k7, %k2, %k2 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $13, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k6, %k6 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k5, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $4, %k7, %k7 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k7, %k0, %k7 -; AVX512DQ-NEXT: kshiftlw $12, %k2, %k2 -; AVX512DQ-NEXT: kshiftrw $12, %k2, %k2 -; AVX512DQ-NEXT: korw %k7, %k2, %k2 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $12, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k6, %k6 +; AVX512DQ-NEXT: kandw %k1, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $5, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k6, %k7 -; AVX512DQ-NEXT: kshiftlw $11, %k2, %k2 -; AVX512DQ-NEXT: kshiftrw $11, %k2, %k2 -; AVX512DQ-NEXT: korw %k7, %k2, %k2 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $11, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k6, %k6 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k1, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $6, %k7, %k7 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k7, %k0, %k7 -; AVX512DQ-NEXT: kshiftlw $10, %k2, %k2 -; AVX512DQ-NEXT: kshiftrw $10, %k2, %k2 -; AVX512DQ-NEXT: korw %k7, %k2, %k2 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $10, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k6, %k6 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k1, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $7, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k1, %k7 -; AVX512DQ-NEXT: kshiftlw $9, %k2, %k2 -; AVX512DQ-NEXT: kshiftrw $9, %k2, %k2 -; AVX512DQ-NEXT: korw %k7, %k2, %k2 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $9, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k6, %k6 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k1, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $8, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k3, %k7 -; AVX512DQ-NEXT: kshiftlw $8, %k2, %k2 -; AVX512DQ-NEXT: kshiftrw $8, %k2, %k2 -; AVX512DQ-NEXT: korw %k7, %k2, %k2 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $8, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k6, %k6 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k1, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $9, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k4, %k7 -; AVX512DQ-NEXT: kshiftlw $7, %k2, %k2 -; AVX512DQ-NEXT: kshiftrw $7, %k2, %k2 -; AVX512DQ-NEXT: korw %k7, %k2, %k2 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $7, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k6, %k6 +; AVX512DQ-NEXT: kandw %k3, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $10, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k5, %k6 -; AVX512DQ-NEXT: kshiftlw $6, %k2, %k2 -; AVX512DQ-NEXT: kshiftrw $6, %k2, %k2 -; AVX512DQ-NEXT: korw %k6, %k2, %k2 +; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 +; AVX512DQ-NEXT: kshiftrw $6, %k7, %k7 +; AVX512DQ-NEXT: korw %k7, %k6, %k6 +; AVX512DQ-NEXT: kandw %k4, %k6, %k5 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kshiftlw $11, %k6, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k5, %k5 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k6, %k1, %k5 -; AVX512DQ-NEXT: kshiftlw $5, %k2, %k2 -; AVX512DQ-NEXT: kshiftrw $5, %k2, %k2 -; AVX512DQ-NEXT: korw %k5, %k2, %k2 +; AVX512DQ-NEXT: kandw %k1, %k5, %k4 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k5 -; AVX512DQ-NEXT: kshiftlw $12, %k5, %k5 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k5, %k1, %k4 -; AVX512DQ-NEXT: kshiftlw $4, %k2, %k2 -; AVX512DQ-NEXT: kshiftrw $4, %k2, %k2 -; AVX512DQ-NEXT: korw %k4, %k2, %k2 +; AVX512DQ-NEXT: kshiftlw $15, %k5, %k5 +; AVX512DQ-NEXT: kshiftrw $4, %k5, %k5 +; AVX512DQ-NEXT: korw %k5, %k4, %k4 +; AVX512DQ-NEXT: kandw %k2, %k4, %k3 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k4 -; AVX512DQ-NEXT: kshiftlw $13, %k4, %k4 +; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 +; AVX512DQ-NEXT: kshiftrw $3, %k4, %k4 +; AVX512DQ-NEXT: korw %k4, %k3, %k3 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k4, %k1, %k3 -; AVX512DQ-NEXT: kshiftlw $3, %k2, %k2 -; AVX512DQ-NEXT: kshiftrw $3, %k2, %k2 -; AVX512DQ-NEXT: korw %k3, %k2, %k2 +; AVX512DQ-NEXT: kandw %k1, %k3, %k2 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k3 -; AVX512DQ-NEXT: kshiftlw $14, %k3, %k3 +; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3 +; AVX512DQ-NEXT: kshiftrw $2, %k3, %k3 +; AVX512DQ-NEXT: korw %k3, %k2, %k2 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: korw %k3, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $2, %k2, %k2 -; AVX512DQ-NEXT: kshiftrw $2, %k2, %k2 -; AVX512DQ-NEXT: korw %k1, %k2, %k1 +; AVX512DQ-NEXT: kandw %k1, %k2, %k1 +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQ-NEXT: kmovw %eax, %k2 +; AVX512DQ-NEXT: kshiftlw $14, %k2, %k2 +; AVX512DQ-NEXT: korw %k2, %k1, %k1 ; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al @@ -3814,7 +3708,6 @@ ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2 ; AVX512DQ-NEXT: korw %k2, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, 6(%rdi) -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload ; AVX512DQ-NEXT: kmovw %k0, 4(%rdi) ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload ; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll --- a/llvm/test/CodeGen/X86/masked_store.ll +++ b/llvm/test/CodeGen/X86/masked_store.ll @@ -4913,26 +4913,22 @@ ; AVX512F-LABEL: widen_masked_store: ; AVX512F: ## %bb.0: ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: movw $-3, %ax +; AVX512F-NEXT: kmovw %eax, %k0 ; AVX512F-NEXT: andl $1, %esi -; AVX512F-NEXT: kmovw %esi, %k0 -; AVX512F-NEXT: kshiftrw $2, %k0, %k1 -; AVX512F-NEXT: kshiftlw $2, %k1, %k1 -; AVX512F-NEXT: kshiftlw $15, %k0, %k0 -; AVX512F-NEXT: kshiftrw $15, %k0, %k0 -; AVX512F-NEXT: kmovw %edx, %k2 -; AVX512F-NEXT: kshiftlw $15, %k2, %k2 -; AVX512F-NEXT: kshiftrw $14, %k2, %k2 -; AVX512F-NEXT: korw %k2, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: kshiftrw $3, %k0, %k1 -; AVX512F-NEXT: kshiftlw $3, %k1, %k1 -; AVX512F-NEXT: kshiftlw $14, %k0, %k0 -; AVX512F-NEXT: kshiftrw $14, %k0, %k0 +; AVX512F-NEXT: kmovw %esi, %k1 +; AVX512F-NEXT: kandw %k0, %k1, %k0 +; AVX512F-NEXT: kmovw %edx, %k1 +; AVX512F-NEXT: kshiftlw $15, %k1, %k1 +; AVX512F-NEXT: kshiftrw $14, %k1, %k1 ; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: movw $-5, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: kandw %k1, %k0, %k0 ; AVX512F-NEXT: kmovw %ecx, %k1 ; AVX512F-NEXT: kshiftlw $15, %k1, %k1 ; AVX512F-NEXT: kshiftrw $13, %k1, %k1 -; AVX512F-NEXT: korw %k0, %k1, %k0 +; AVX512F-NEXT: korw %k1, %k0, %k0 ; AVX512F-NEXT: kshiftlw $12, %k0, %k0 ; AVX512F-NEXT: kshiftrw $12, %k0, %k1 ; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} @@ -4941,52 +4937,44 @@ ; ; AVX512VLDQ-LABEL: widen_masked_store: ; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: kmovw %esi, %k0 -; AVX512VLDQ-NEXT: kshiftlb $7, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $7, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $2, %k0, %k1 -; AVX512VLDQ-NEXT: kshiftlb $2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $7, %k0, %k0 -; AVX512VLDQ-NEXT: kmovw %edx, %k2 -; AVX512VLDQ-NEXT: kshiftlb $7, %k2, %k2 -; AVX512VLDQ-NEXT: kshiftrb $6, %k2, %k2 -; AVX512VLDQ-NEXT: korb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: korb %k1, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $3, %k0, %k1 -; AVX512VLDQ-NEXT: kshiftlb $3, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $6, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k0 +; AVX512VLDQ-NEXT: movb $-3, %al +; AVX512VLDQ-NEXT: kmovw %eax, %k0 +; AVX512VLDQ-NEXT: kmovw %esi, %k1 +; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 +; AVX512VLDQ-NEXT: kshiftrb $7, %k1, %k1 +; AVX512VLDQ-NEXT: kandw %k0, %k1, %k0 +; AVX512VLDQ-NEXT: kmovw %edx, %k1 +; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 +; AVX512VLDQ-NEXT: kshiftrb $6, %k1, %k1 ; AVX512VLDQ-NEXT: korw %k1, %k0, %k0 +; AVX512VLDQ-NEXT: movb $-5, %al +; AVX512VLDQ-NEXT: kmovw %eax, %k1 +; AVX512VLDQ-NEXT: kandw %k1, %k0, %k0 ; AVX512VLDQ-NEXT: kmovw %ecx, %k1 ; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 ; AVX512VLDQ-NEXT: kshiftrb $5, %k1, %k1 -; AVX512VLDQ-NEXT: korw %k0, %k1, %k1 +; AVX512VLDQ-NEXT: korw %k1, %k0, %k1 ; AVX512VLDQ-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1} ; AVX512VLDQ-NEXT: retq ; ; AVX512VLBW-LABEL: widen_masked_store: ; AVX512VLBW: ## %bb.0: +; AVX512VLBW-NEXT: movw $-3, %ax +; AVX512VLBW-NEXT: kmovd %eax, %k0 ; AVX512VLBW-NEXT: andl $1, %esi -; AVX512VLBW-NEXT: kmovw %esi, %k0 -; AVX512VLBW-NEXT: kshiftrw $2, %k0, %k1 -; AVX512VLBW-NEXT: kshiftlw $2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512VLBW-NEXT: kshiftrw $15, %k0, %k0 -; AVX512VLBW-NEXT: kmovd %edx, %k2 -; AVX512VLBW-NEXT: kshiftlw $15, %k2, %k2 -; AVX512VLBW-NEXT: kshiftrw $14, %k2, %k2 -; AVX512VLBW-NEXT: korw %k2, %k1, %k1 -; AVX512VLBW-NEXT: korw %k1, %k0, %k0 -; AVX512VLBW-NEXT: kshiftrw $3, %k0, %k1 -; AVX512VLBW-NEXT: kshiftlw $3, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $14, %k0, %k0 -; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k0 +; AVX512VLBW-NEXT: kmovw %esi, %k1 +; AVX512VLBW-NEXT: kandw %k0, %k1, %k0 +; AVX512VLBW-NEXT: kmovd %edx, %k1 +; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512VLBW-NEXT: kshiftrw $14, %k1, %k1 ; AVX512VLBW-NEXT: korw %k1, %k0, %k0 +; AVX512VLBW-NEXT: movw $-5, %ax +; AVX512VLBW-NEXT: kmovd %eax, %k1 +; AVX512VLBW-NEXT: kandw %k1, %k0, %k0 ; AVX512VLBW-NEXT: kmovd %ecx, %k1 ; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512VLBW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512VLBW-NEXT: korw %k0, %k1, %k1 +; AVX512VLBW-NEXT: korw %k1, %k0, %k1 ; AVX512VLBW-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1} ; AVX512VLBW-NEXT: retq call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> %v, <3 x i32>* %p, i32 16, <3 x i1> %mask) diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -1130,439 +1130,438 @@ ; CHECK-NEXT: vmovdqa (%rdi), %ymm1 ; CHECK-NEXT: vmovdqa 32(%rdi), %ymm0 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k0 -; CHECK-NEXT: kshiftrd $3, %k0, %k1 -; CHECK-NEXT: kshiftlq $2, %k0, %k2 -; CHECK-NEXT: kshiftlq $1, %k0, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $1, %k0, %k3 -; CHECK-NEXT: kshiftlq $63, %k3, %k3 -; CHECK-NEXT: kshiftrq $63, %k3, %k3 -; CHECK-NEXT: korq %k2, %k3, %k2 -; CHECK-NEXT: kshiftlq $3, %k0, %k3 -; CHECK-NEXT: kshiftlq $2, %k1, %k1 -; CHECK-NEXT: korq %k1, %k3, %k1 -; CHECK-NEXT: kshiftrd $2, %k0, %k3 -; CHECK-NEXT: kshiftlq $62, %k2, %k2 +; CHECK-NEXT: kshiftrd $1, %k0, %k1 +; CHECK-NEXT: movq $-3, %rax +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftlq $63, %k0, %k2 ; CHECK-NEXT: kshiftrq $62, %k2, %k2 -; CHECK-NEXT: korq %k1, %k2, %k1 -; CHECK-NEXT: kshiftlq $4, %k0, %k2 -; CHECK-NEXT: kshiftlq $3, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $5, %k0, %k3 -; CHECK-NEXT: kshiftlq $61, %k1, %k1 -; CHECK-NEXT: kshiftrq $61, %k1, %k1 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $5, %k0, %k2 -; CHECK-NEXT: kshiftlq $4, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $4, %k0, %k3 -; CHECK-NEXT: kshiftlq $60, %k1, %k1 -; CHECK-NEXT: kshiftrq $60, %k1, %k1 +; CHECK-NEXT: movq $-5, %rax +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $3, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $61, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $6, %k0, %k2 -; CHECK-NEXT: kshiftlq $5, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $7, %k0, %k3 -; CHECK-NEXT: kshiftlq $59, %k1, %k1 -; CHECK-NEXT: kshiftrq $59, %k1, %k1 +; CHECK-NEXT: movq $-9, %rax +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $2, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $60, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $7, %k0, %k2 -; CHECK-NEXT: kshiftlq $6, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $6, %k0, %k3 -; CHECK-NEXT: kshiftlq $58, %k1, %k1 -; CHECK-NEXT: kshiftrq $58, %k1, %k1 +; CHECK-NEXT: movq $-17, %rax +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $5, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $59, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $8, %k0, %k2 -; CHECK-NEXT: kshiftlq $7, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $9, %k0, %k3 -; CHECK-NEXT: kshiftlq $57, %k1, %k1 -; CHECK-NEXT: kshiftrq $57, %k1, %k1 +; CHECK-NEXT: movq $-33, %rax +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $4, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $58, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $9, %k0, %k2 -; CHECK-NEXT: kshiftlq $8, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $8, %k0, %k3 -; CHECK-NEXT: kshiftlq $56, %k1, %k1 -; CHECK-NEXT: kshiftrq $56, %k1, %k1 +; CHECK-NEXT: movq $-65, %rax +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $7, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $57, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $10, %k0, %k2 -; CHECK-NEXT: kshiftlq $9, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $11, %k0, %k3 -; CHECK-NEXT: kshiftlq $55, %k1, %k1 -; CHECK-NEXT: kshiftrq $55, %k1, %k1 +; CHECK-NEXT: movq $-129, %rax +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $6, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $56, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $11, %k0, %k2 -; CHECK-NEXT: kshiftlq $10, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $10, %k0, %k3 -; CHECK-NEXT: kshiftlq $54, %k1, %k1 -; CHECK-NEXT: kshiftrq $54, %k1, %k1 +; CHECK-NEXT: movq $-257, %rax # imm = 0xFEFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $9, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $55, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $12, %k0, %k2 -; CHECK-NEXT: kshiftlq $11, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $13, %k0, %k3 -; CHECK-NEXT: kshiftlq $53, %k1, %k1 -; CHECK-NEXT: kshiftrq $53, %k1, %k1 +; CHECK-NEXT: movq $-513, %rax # imm = 0xFDFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $8, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $54, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $13, %k0, %k2 -; CHECK-NEXT: kshiftlq $12, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $12, %k0, %k3 -; CHECK-NEXT: kshiftlq $52, %k1, %k1 -; CHECK-NEXT: kshiftrq $52, %k1, %k1 +; CHECK-NEXT: movq $-1025, %rax # imm = 0xFBFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $11, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $53, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $14, %k0, %k2 -; CHECK-NEXT: kshiftlq $13, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $15, %k0, %k3 -; CHECK-NEXT: kshiftlq $51, %k1, %k1 -; CHECK-NEXT: kshiftrq $51, %k1, %k1 +; CHECK-NEXT: movq $-2049, %rax # imm = 0xF7FF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $10, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $52, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $15, %k0, %k2 -; CHECK-NEXT: kshiftlq $14, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $14, %k0, %k3 -; CHECK-NEXT: kshiftlq $50, %k1, %k1 -; CHECK-NEXT: kshiftrq $50, %k1, %k1 +; CHECK-NEXT: movq $-4097, %rax # imm = 0xEFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $13, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $51, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $16, %k0, %k2 -; CHECK-NEXT: kshiftlq $15, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $17, %k0, %k3 -; CHECK-NEXT: kshiftlq $49, %k1, %k1 -; CHECK-NEXT: kshiftrq $49, %k1, %k1 +; CHECK-NEXT: movq $-8193, %rax # imm = 0xDFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $12, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $50, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $17, %k0, %k2 -; CHECK-NEXT: kshiftlq $16, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $16, %k0, %k3 -; CHECK-NEXT: kshiftlq $48, %k1, %k1 -; CHECK-NEXT: kshiftrq $48, %k1, %k1 +; CHECK-NEXT: movq $-16385, %rax # imm = 0xBFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $15, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $49, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $18, %k0, %k2 -; CHECK-NEXT: kshiftlq $17, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $19, %k0, %k3 -; CHECK-NEXT: kshiftlq $47, %k1, %k1 -; CHECK-NEXT: kshiftrq $47, %k1, %k1 +; CHECK-NEXT: movq $-32769, %rax # imm = 0xFFFF7FFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $14, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $48, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $19, %k0, %k2 -; CHECK-NEXT: kshiftlq $18, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $18, %k0, %k3 -; CHECK-NEXT: kshiftlq $46, %k1, %k1 -; CHECK-NEXT: kshiftrq $46, %k1, %k1 +; CHECK-NEXT: movq $-65537, %rax # imm = 0xFFFEFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $17, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $47, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $20, %k0, %k2 -; CHECK-NEXT: kshiftlq $19, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $21, %k0, %k3 -; CHECK-NEXT: kshiftlq $45, %k1, %k1 -; CHECK-NEXT: kshiftrq $45, %k1, %k1 +; CHECK-NEXT: movq $-131073, %rax # imm = 0xFFFDFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $16, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $46, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $21, %k0, %k2 -; CHECK-NEXT: kshiftlq $20, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $20, %k0, %k3 -; CHECK-NEXT: kshiftlq $44, %k1, %k1 -; CHECK-NEXT: kshiftrq $44, %k1, %k1 +; CHECK-NEXT: movq $-262145, %rax # imm = 0xFFFBFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $19, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $45, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $22, %k0, %k2 -; CHECK-NEXT: kshiftlq $21, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $23, %k0, %k3 -; CHECK-NEXT: kshiftlq $43, %k1, %k1 -; CHECK-NEXT: kshiftrq $43, %k1, %k1 +; CHECK-NEXT: movq $-524289, %rax # imm = 0xFFF7FFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $18, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $44, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $23, %k0, %k2 -; CHECK-NEXT: kshiftlq $22, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $22, %k0, %k3 -; CHECK-NEXT: kshiftlq $42, %k1, %k1 -; CHECK-NEXT: kshiftrq $42, %k1, %k1 +; CHECK-NEXT: movq $-1048577, %rax # imm = 0xFFEFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $21, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $43, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $24, %k0, %k2 -; CHECK-NEXT: kshiftlq $23, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $25, %k0, %k3 -; CHECK-NEXT: kshiftlq $41, %k1, %k1 -; CHECK-NEXT: kshiftrq $41, %k1, %k1 +; CHECK-NEXT: movq $-2097153, %rax # imm = 0xFFDFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $20, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $42, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $25, %k0, %k2 -; CHECK-NEXT: kshiftlq $24, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $24, %k0, %k3 -; CHECK-NEXT: kshiftlq $40, %k1, %k1 -; CHECK-NEXT: kshiftrq $40, %k1, %k1 +; CHECK-NEXT: movq $-4194305, %rax # imm = 0xFFBFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $23, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $41, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $26, %k0, %k2 -; CHECK-NEXT: kshiftlq $25, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $27, %k0, %k3 -; CHECK-NEXT: kshiftlq $39, %k1, %k1 -; CHECK-NEXT: kshiftrq $39, %k1, %k1 +; CHECK-NEXT: movq $-8388609, %rax # imm = 0xFF7FFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $22, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $40, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $27, %k0, %k2 -; CHECK-NEXT: kshiftlq $26, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $26, %k0, %k3 -; CHECK-NEXT: kshiftlq $38, %k1, %k1 -; CHECK-NEXT: kshiftrq $38, %k1, %k1 +; CHECK-NEXT: movq $-16777217, %rax # imm = 0xFEFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $25, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $39, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $28, %k0, %k2 -; CHECK-NEXT: kshiftlq $27, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $29, %k0, %k3 -; CHECK-NEXT: kshiftlq $37, %k1, %k1 -; CHECK-NEXT: kshiftrq $37, %k1, %k1 +; CHECK-NEXT: movq $-33554433, %rax # imm = 0xFDFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $24, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $38, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $29, %k0, %k2 -; CHECK-NEXT: kshiftlq $28, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $28, %k0, %k3 -; CHECK-NEXT: kshiftlq $36, %k1, %k1 -; CHECK-NEXT: kshiftrq $36, %k1, %k1 +; CHECK-NEXT: movq $-67108865, %rax # imm = 0xFBFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $27, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $37, %k2, %k2 ; CHECK-NEXT: korq %k2, %k1, %k1 -; CHECK-NEXT: kshiftlq $30, %k0, %k2 -; CHECK-NEXT: kshiftlq $29, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $31, %k0, %k3 -; CHECK-NEXT: kshiftlq $35, %k1, %k1 -; CHECK-NEXT: kshiftrq $35, %k1, %k1 -; CHECK-NEXT: korq %k2, %k1, %k2 -; CHECK-NEXT: kshiftlq $31, %k0, %k1 -; CHECK-NEXT: kshiftlq $30, %k3, %k3 -; CHECK-NEXT: korq %k3, %k1, %k3 -; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 -; CHECK-NEXT: kshiftrd $30, %k0, %k0 -; CHECK-NEXT: kshiftlq $34, %k2, %k2 +; CHECK-NEXT: movq $-134217729, %rax # imm = 0xF7FFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $26, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $36, %k2, %k2 +; CHECK-NEXT: korq %k2, %k1, %k1 +; CHECK-NEXT: movq $-268435457, %rax # imm = 0xEFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $29, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $35, %k2, %k2 +; CHECK-NEXT: korq %k2, %k1, %k1 +; CHECK-NEXT: movq $-536870913, %rax # imm = 0xDFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $28, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 ; CHECK-NEXT: kshiftrq $34, %k2, %k2 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftlq $32, %k0, %k3 -; CHECK-NEXT: kshiftlq $31, %k0, %k0 -; CHECK-NEXT: korq %k0, %k3, %k0 -; CHECK-NEXT: kshiftrd $1, %k1, %k3 -; CHECK-NEXT: kshiftlq $33, %k2, %k2 +; CHECK-NEXT: korq %k2, %k1, %k1 +; CHECK-NEXT: movq $-1073741825, %rax # imm = 0xBFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k1 +; CHECK-NEXT: kshiftrd $31, %k0, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 ; CHECK-NEXT: kshiftrq $33, %k2, %k2 -; CHECK-NEXT: korq %k0, %k2, %k0 -; CHECK-NEXT: kshiftlq $32, %k0, %k0 +; CHECK-NEXT: korq %k2, %k1, %k1 +; CHECK-NEXT: movabsq $-2147483649, %rax # imm = 0xFFFFFFFF7FFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k1, %k2 +; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 +; CHECK-NEXT: kshiftrd $30, %k0, %k0 +; CHECK-NEXT: kshiftlq $63, %k0, %k0 ; CHECK-NEXT: kshiftrq $32, %k0, %k0 -; CHECK-NEXT: kshiftlq $33, %k0, %k2 -; CHECK-NEXT: kshiftlq $32, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 +; CHECK-NEXT: korq %k0, %k2, %k0 +; CHECK-NEXT: movabsq $-4294967297, %rax # imm = 0xFFFFFFFEFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $1, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $31, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $34, %k0, %k2 -; CHECK-NEXT: kshiftlq $33, %k1, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $3, %k1, %k3 -; CHECK-NEXT: kshiftlq $31, %k0, %k0 -; CHECK-NEXT: kshiftrq $31, %k0, %k0 +; CHECK-NEXT: movabsq $-8589934593, %rax # imm = 0xFFFFFFFDFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftlq $63, %k1, %k2 +; CHECK-NEXT: kshiftrq $30, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $35, %k0, %k2 -; CHECK-NEXT: kshiftlq $34, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $2, %k1, %k3 -; CHECK-NEXT: kshiftlq $30, %k0, %k0 -; CHECK-NEXT: kshiftrq $30, %k0, %k0 +; CHECK-NEXT: movabsq $-17179869185, %rax # imm = 0xFFFFFFFBFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $3, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $29, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $36, %k0, %k2 -; CHECK-NEXT: kshiftlq $35, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $5, %k1, %k3 -; CHECK-NEXT: kshiftlq $29, %k0, %k0 -; CHECK-NEXT: kshiftrq $29, %k0, %k0 +; CHECK-NEXT: movabsq $-34359738369, %rax # imm = 0xFFFFFFF7FFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $2, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $28, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $37, %k0, %k2 -; CHECK-NEXT: kshiftlq $36, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $4, %k1, %k3 -; CHECK-NEXT: kshiftlq $28, %k0, %k0 -; CHECK-NEXT: kshiftrq $28, %k0, %k0 +; CHECK-NEXT: movabsq $-68719476737, %rax # imm = 0xFFFFFFEFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $5, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $27, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $38, %k0, %k2 -; CHECK-NEXT: kshiftlq $37, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $7, %k1, %k3 -; CHECK-NEXT: kshiftlq $27, %k0, %k0 -; CHECK-NEXT: kshiftrq $27, %k0, %k0 +; CHECK-NEXT: movabsq $-137438953473, %rax # imm = 0xFFFFFFDFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $4, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $26, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $39, %k0, %k2 -; CHECK-NEXT: kshiftlq $38, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $6, %k1, %k3 -; CHECK-NEXT: kshiftlq $26, %k0, %k0 -; CHECK-NEXT: kshiftrq $26, %k0, %k0 +; CHECK-NEXT: movabsq $-274877906945, %rax # imm = 0xFFFFFFBFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $7, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $25, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $40, %k0, %k2 -; CHECK-NEXT: kshiftlq $39, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $9, %k1, %k3 -; CHECK-NEXT: kshiftlq $25, %k0, %k0 -; CHECK-NEXT: kshiftrq $25, %k0, %k0 +; CHECK-NEXT: movabsq $-549755813889, %rax # imm = 0xFFFFFF7FFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $6, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $24, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $41, %k0, %k2 -; CHECK-NEXT: kshiftlq $40, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $8, %k1, %k3 -; CHECK-NEXT: kshiftlq $24, %k0, %k0 -; CHECK-NEXT: kshiftrq $24, %k0, %k0 +; CHECK-NEXT: movabsq $-1099511627777, %rax # imm = 0xFFFFFEFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $9, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $23, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $42, %k0, %k2 -; CHECK-NEXT: kshiftlq $41, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $11, %k1, %k3 -; CHECK-NEXT: kshiftlq $23, %k0, %k0 -; CHECK-NEXT: kshiftrq $23, %k0, %k0 +; CHECK-NEXT: movabsq $-2199023255553, %rax # imm = 0xFFFFFDFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $8, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $22, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $43, %k0, %k2 -; CHECK-NEXT: kshiftlq $42, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $10, %k1, %k3 -; CHECK-NEXT: kshiftlq $22, %k0, %k0 -; CHECK-NEXT: kshiftrq $22, %k0, %k0 +; CHECK-NEXT: movabsq $-4398046511105, %rax # imm = 0xFFFFFBFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $11, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $21, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $44, %k0, %k2 -; CHECK-NEXT: kshiftlq $43, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $13, %k1, %k3 -; CHECK-NEXT: kshiftlq $21, %k0, %k0 -; CHECK-NEXT: kshiftrq $21, %k0, %k0 +; CHECK-NEXT: movabsq $-8796093022209, %rax # imm = 0xFFFFF7FFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $10, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $20, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $45, %k0, %k2 -; CHECK-NEXT: kshiftlq $44, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $12, %k1, %k3 -; CHECK-NEXT: kshiftlq $20, %k0, %k0 -; CHECK-NEXT: kshiftrq $20, %k0, %k0 +; CHECK-NEXT: movabsq $-17592186044417, %rax # imm = 0xFFFFEFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $13, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $19, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $46, %k0, %k2 -; CHECK-NEXT: kshiftlq $45, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $15, %k1, %k3 -; CHECK-NEXT: kshiftlq $19, %k0, %k0 -; CHECK-NEXT: kshiftrq $19, %k0, %k0 +; CHECK-NEXT: movabsq $-35184372088833, %rax # imm = 0xFFFFDFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $12, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $18, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $47, %k0, %k2 -; CHECK-NEXT: kshiftlq $46, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $14, %k1, %k3 -; CHECK-NEXT: kshiftlq $18, %k0, %k0 -; CHECK-NEXT: kshiftrq $18, %k0, %k0 +; CHECK-NEXT: movabsq $-70368744177665, %rax # imm = 0xFFFFBFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $15, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $17, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $48, %k0, %k2 -; CHECK-NEXT: kshiftlq $47, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $17, %k1, %k3 -; CHECK-NEXT: kshiftlq $17, %k0, %k0 -; CHECK-NEXT: kshiftrq $17, %k0, %k0 +; CHECK-NEXT: movabsq $-140737488355329, %rax # imm = 0xFFFF7FFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $14, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $16, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $49, %k0, %k2 -; CHECK-NEXT: kshiftlq $48, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $16, %k1, %k3 -; CHECK-NEXT: kshiftlq $16, %k0, %k0 -; CHECK-NEXT: kshiftrq $16, %k0, %k0 +; CHECK-NEXT: movabsq $-281474976710657, %rax # imm = 0xFFFEFFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $17, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $15, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $50, %k0, %k2 -; CHECK-NEXT: kshiftlq $49, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $19, %k1, %k3 -; CHECK-NEXT: kshiftlq $15, %k0, %k0 -; CHECK-NEXT: kshiftrq $15, %k0, %k0 +; CHECK-NEXT: movabsq $-562949953421313, %rax # imm = 0xFFFDFFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $16, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $14, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $51, %k0, %k2 -; CHECK-NEXT: kshiftlq $50, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $18, %k1, %k3 -; CHECK-NEXT: kshiftlq $14, %k0, %k0 -; CHECK-NEXT: kshiftrq $14, %k0, %k0 +; CHECK-NEXT: movabsq $-1125899906842625, %rax # imm = 0xFFFBFFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $19, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $13, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $52, %k0, %k2 -; CHECK-NEXT: kshiftlq $51, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $21, %k1, %k3 -; CHECK-NEXT: kshiftlq $13, %k0, %k0 -; CHECK-NEXT: kshiftrq $13, %k0, %k0 +; CHECK-NEXT: movabsq $-2251799813685249, %rax # imm = 0xFFF7FFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $18, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $12, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $53, %k0, %k2 -; CHECK-NEXT: kshiftlq $52, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $20, %k1, %k3 -; CHECK-NEXT: kshiftlq $12, %k0, %k0 -; CHECK-NEXT: kshiftrq $12, %k0, %k0 +; CHECK-NEXT: movabsq $-4503599627370497, %rax # imm = 0xFFEFFFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $21, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $11, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $54, %k0, %k2 -; CHECK-NEXT: kshiftlq $53, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $23, %k1, %k3 -; CHECK-NEXT: kshiftlq $11, %k0, %k0 -; CHECK-NEXT: kshiftrq $11, %k0, %k0 +; CHECK-NEXT: movabsq $-9007199254740993, %rax # imm = 0xFFDFFFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $20, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $10, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $55, %k0, %k2 -; CHECK-NEXT: kshiftlq $54, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $22, %k1, %k3 -; CHECK-NEXT: kshiftlq $10, %k0, %k0 -; CHECK-NEXT: kshiftrq $10, %k0, %k0 +; CHECK-NEXT: movabsq $-18014398509481985, %rax # imm = 0xFFBFFFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $23, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $9, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $56, %k0, %k2 -; CHECK-NEXT: kshiftlq $55, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $25, %k1, %k3 -; CHECK-NEXT: kshiftlq $9, %k0, %k0 -; CHECK-NEXT: kshiftrq $9, %k0, %k0 +; CHECK-NEXT: movabsq $-36028797018963969, %rax # imm = 0xFF7FFFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $22, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $8, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $57, %k0, %k2 -; CHECK-NEXT: kshiftlq $56, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $24, %k1, %k3 -; CHECK-NEXT: kshiftlq $8, %k0, %k0 -; CHECK-NEXT: kshiftrq $8, %k0, %k0 +; CHECK-NEXT: movabsq $-72057594037927937, %rax # imm = 0xFEFFFFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $25, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $7, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $58, %k0, %k2 -; CHECK-NEXT: kshiftlq $57, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $27, %k1, %k3 -; CHECK-NEXT: kshiftlq $7, %k0, %k0 -; CHECK-NEXT: kshiftrq $7, %k0, %k0 +; CHECK-NEXT: movabsq $-144115188075855873, %rax # imm = 0xFDFFFFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $24, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $6, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $59, %k0, %k2 -; CHECK-NEXT: kshiftlq $58, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $26, %k1, %k3 -; CHECK-NEXT: kshiftlq $6, %k0, %k0 -; CHECK-NEXT: kshiftrq $6, %k0, %k0 +; CHECK-NEXT: movabsq $-288230376151711745, %rax # imm = 0xFBFFFFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $27, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $5, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $60, %k0, %k2 -; CHECK-NEXT: kshiftlq $59, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $29, %k1, %k3 -; CHECK-NEXT: kshiftlq $5, %k0, %k0 -; CHECK-NEXT: kshiftrq $5, %k0, %k0 +; CHECK-NEXT: movabsq $-576460752303423489, %rax # imm = 0xF7FFFFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $26, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $4, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $61, %k0, %k2 -; CHECK-NEXT: kshiftlq $60, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $28, %k1, %k3 -; CHECK-NEXT: kshiftlq $4, %k0, %k0 -; CHECK-NEXT: kshiftrq $4, %k0, %k0 +; CHECK-NEXT: movabsq $-1152921504606846977, %rax # imm = 0xEFFFFFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $29, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $3, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $62, %k0, %k2 -; CHECK-NEXT: kshiftlq $61, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftrd $31, %k1, %k3 -; CHECK-NEXT: kshiftlq $3, %k0, %k0 -; CHECK-NEXT: kshiftrq $3, %k0, %k0 +; CHECK-NEXT: movabsq $-2305843009213693953, %rax # imm = 0xDFFFFFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $28, %k1, %k2 +; CHECK-NEXT: kshiftlq $63, %k2, %k2 +; CHECK-NEXT: kshiftrq $2, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 -; CHECK-NEXT: kshiftlq $63, %k0, %k2 -; CHECK-NEXT: kshiftlq $62, %k3, %k3 -; CHECK-NEXT: korq %k3, %k2, %k2 -; CHECK-NEXT: kshiftlq $2, %k0, %k0 -; CHECK-NEXT: kshiftrq $2, %k0, %k0 +; CHECK-NEXT: movabsq $-4611686018427387905, %rax # imm = 0xBFFFFFFFFFFFFFFF +; CHECK-NEXT: kmovq %rax, %k2 +; CHECK-NEXT: kandq %k2, %k0, %k0 +; CHECK-NEXT: kshiftrd $31, %k1, %k2 +; CHECK-NEXT: kshiftlq $62, %k2, %k2 ; CHECK-NEXT: korq %k2, %k0, %k0 ; CHECK-NEXT: kshiftrd $30, %k1, %k1 ; CHECK-NEXT: kshiftlq $1, %k0, %k0 diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll --- a/llvm/test/CodeGen/X86/vec_smulo.ll +++ b/llvm/test/CodeGen/X86/vec_smulo.ll @@ -1730,25 +1730,24 @@ ; ; AVX512-LABEL: smulo_v2i64: ; AVX512: # %bb.0: -; AVX512-NEXT: vpextrq $1, %xmm1, %rax -; AVX512-NEXT: vpextrq $1, %xmm0, %rcx -; AVX512-NEXT: vmovq %xmm1, %rdx -; AVX512-NEXT: vmovq %xmm0, %rsi +; AVX512-NEXT: vmovq %xmm1, %rax +; AVX512-NEXT: vmovq %xmm0, %rcx +; AVX512-NEXT: vpextrq $1, %xmm1, %rdx +; AVX512-NEXT: vpextrq $1, %xmm0, %rsi ; AVX512-NEXT: imulq %rdx, %rsi ; AVX512-NEXT: seto %dl +; AVX512-NEXT: vmovq %rsi, %xmm0 ; AVX512-NEXT: imulq %rax, %rcx -; AVX512-NEXT: vmovq %rcx, %xmm0 -; AVX512-NEXT: vmovq %rsi, %xmm1 +; AVX512-NEXT: vmovq %rcx, %xmm1 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; AVX512-NEXT: seto %al -; AVX512-NEXT: kmovd %eax, %k0 -; AVX512-NEXT: kshiftlw $15, %k0, %k0 -; AVX512-NEXT: kshiftrw $14, %k0, %k0 +; AVX512-NEXT: movw $-3, %cx +; AVX512-NEXT: kmovd %ecx, %k0 +; AVX512-NEXT: kmovd %eax, %k1 +; AVX512-NEXT: kandw %k0, %k1, %k0 ; AVX512-NEXT: kmovd %edx, %k1 ; AVX512-NEXT: kshiftlw $15, %k1, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kshiftlw $2, %k0, %k2 -; AVX512-NEXT: korw %k2, %k1, %k1 +; AVX512-NEXT: kshiftrw $14, %k1, %k1 ; AVX512-NEXT: korw %k1, %k0, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} @@ -2249,10 +2248,10 @@ ; AVX512-NEXT: setne %cl ; AVX512-NEXT: orb %al, %cl ; AVX512-NEXT: setne %al +; AVX512-NEXT: kmovd %eax, %k1 +; AVX512-NEXT: movw $-3, %ax ; AVX512-NEXT: kmovd %eax, %k0 -; AVX512-NEXT: kshiftlw $15, %k0, %k0 -; AVX512-NEXT: kshiftrw $15, %k0, %k1 -; AVX512-NEXT: kshiftlw $2, %k0, %k0 +; AVX512-NEXT: kandw %k0, %k1, %k1 ; AVX512-NEXT: movl %edx, %eax ; AVX512-NEXT: imulb %sil ; AVX512-NEXT: movl %eax, %edx @@ -2265,12 +2264,12 @@ ; AVX512-NEXT: orb %al, %cl ; AVX512-NEXT: setne %al ; AVX512-NEXT: kmovd %eax, %k2 -; AVX512-NEXT: kshiftlw $1, %k2, %k2 -; AVX512-NEXT: korw %k2, %k0, %k2 -; AVX512-NEXT: korw %k2, %k1, %k1 -; AVX512-NEXT: kshiftlw $14, %k1, %k1 -; AVX512-NEXT: kshiftrw $14, %k1, %k1 -; AVX512-NEXT: kshiftlw $3, %k0, %k2 +; AVX512-NEXT: kshiftlw $15, %k2, %k2 +; AVX512-NEXT: kshiftrw $14, %k2, %k2 +; AVX512-NEXT: korw %k2, %k1, %k2 +; AVX512-NEXT: movw $-5, %ax +; AVX512-NEXT: kmovd %eax, %k1 +; AVX512-NEXT: kandw %k1, %k2, %k2 ; AVX512-NEXT: movl %r11d, %eax ; AVX512-NEXT: imulb %bl ; AVX512-NEXT: movl %eax, %esi @@ -2285,9 +2284,8 @@ ; AVX512-NEXT: kmovd %eax, %k3 ; AVX512-NEXT: kshiftlw $2, %k3, %k3 ; AVX512-NEXT: korw %k3, %k2, %k2 -; AVX512-NEXT: korw %k2, %k1, %k1 -; AVX512-NEXT: kshiftlw $13, %k1, %k1 -; AVX512-NEXT: kshiftrw $13, %k1, %k1 +; AVX512-NEXT: kshiftlw $13, %k2, %k2 +; AVX512-NEXT: kshiftrw $13, %k2, %k2 ; AVX512-NEXT: movl %r10d, %eax ; AVX512-NEXT: imulb %r9b ; AVX512-NEXT: # kill: def $al killed $al def $eax @@ -2299,37 +2297,29 @@ ; AVX512-NEXT: setne %bl ; AVX512-NEXT: orb %cl, %bl ; AVX512-NEXT: setne %cl -; AVX512-NEXT: kmovd %ecx, %k2 -; AVX512-NEXT: kshiftlw $3, %k2, %k2 -; AVX512-NEXT: korw %k2, %k1, %k1 +; AVX512-NEXT: kmovd %ecx, %k3 +; AVX512-NEXT: kshiftlw $3, %k3, %k3 +; AVX512-NEXT: korw %k3, %k2, %k2 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: kmovd %r8d, %k1 -; AVX512-NEXT: kshiftlw $15, %k1, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k2} {z} +; AVX512-NEXT: kmovd %r8d, %k2 +; AVX512-NEXT: kandw %k0, %k2, %k0 ; AVX512-NEXT: kmovd %edx, %k2 ; AVX512-NEXT: kshiftlw $15, %k2, %k2 ; AVX512-NEXT: kshiftrw $14, %k2, %k2 ; AVX512-NEXT: korw %k2, %k0, %k0 -; AVX512-NEXT: korw %k0, %k1, %k0 -; AVX512-NEXT: kshiftrw $3, %k0, %k1 -; AVX512-NEXT: kshiftlw $3, %k1, %k1 -; AVX512-NEXT: kshiftlw $14, %k0, %k0 -; AVX512-NEXT: kshiftrw $14, %k0, %k0 -; AVX512-NEXT: kmovd %esi, %k2 -; AVX512-NEXT: kshiftlw $15, %k2, %k2 -; AVX512-NEXT: kshiftrw $13, %k2, %k2 -; AVX512-NEXT: korw %k2, %k1, %k1 -; AVX512-NEXT: korw %k1, %k0, %k0 -; AVX512-NEXT: kshiftrw $4, %k0, %k1 -; AVX512-NEXT: kshiftlw $4, %k1, %k1 -; AVX512-NEXT: kshiftlw $13, %k0, %k0 -; AVX512-NEXT: kshiftrw $13, %k0, %k0 +; AVX512-NEXT: kandw %k1, %k0, %k0 +; AVX512-NEXT: kmovd %esi, %k1 +; AVX512-NEXT: kshiftlw $15, %k1, %k1 +; AVX512-NEXT: kshiftrw $13, %k1, %k1 ; AVX512-NEXT: korw %k1, %k0, %k0 +; AVX512-NEXT: movw $-9, %cx +; AVX512-NEXT: kmovd %ecx, %k1 +; AVX512-NEXT: kandw %k1, %k0, %k0 ; AVX512-NEXT: kmovd %eax, %k1 ; AVX512-NEXT: kshiftlw $15, %k1, %k1 ; AVX512-NEXT: kshiftrw $12, %k1, %k1 -; AVX512-NEXT: korw %k0, %k1, %k0 +; AVX512-NEXT: korw %k1, %k0, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: movb %al, (%rdi) ; AVX512-NEXT: popq %rbx diff --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll --- a/llvm/test/CodeGen/X86/vec_umulo.ll +++ b/llvm/test/CodeGen/X86/vec_umulo.ll @@ -1528,27 +1528,25 @@ ; ; AVX512-LABEL: umulo_v2i64: ; AVX512: # %bb.0: -; AVX512-NEXT: vpextrq $1, %xmm0, %rcx -; AVX512-NEXT: vpextrq $1, %xmm1, %r8 -; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vmovq %xmm1, %rdx +; AVX512-NEXT: vmovq %xmm0, %rcx +; AVX512-NEXT: vmovq %xmm1, %rsi +; AVX512-NEXT: vpextrq $1, %xmm0, %rax +; AVX512-NEXT: vpextrq $1, %xmm1, %rdx ; AVX512-NEXT: mulq %rdx -; AVX512-NEXT: movq %rax, %rsi -; AVX512-NEXT: seto %r9b -; AVX512-NEXT: movq %rcx, %rax -; AVX512-NEXT: mulq %r8 +; AVX512-NEXT: seto %r8b ; AVX512-NEXT: vmovq %rax, %xmm0 -; AVX512-NEXT: vmovq %rsi, %xmm1 +; AVX512-NEXT: movq %rcx, %rax +; AVX512-NEXT: mulq %rsi +; AVX512-NEXT: vmovq %rax, %xmm1 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; AVX512-NEXT: seto %al -; AVX512-NEXT: kmovd %eax, %k0 -; AVX512-NEXT: kshiftlw $15, %k0, %k0 -; AVX512-NEXT: kshiftrw $14, %k0, %k0 -; AVX512-NEXT: kmovd %r9d, %k1 +; AVX512-NEXT: movw $-3, %cx +; AVX512-NEXT: kmovd %ecx, %k0 +; AVX512-NEXT: kmovd %eax, %k1 +; AVX512-NEXT: kandw %k0, %k1, %k0 +; AVX512-NEXT: kmovd %r8d, %k1 ; AVX512-NEXT: kshiftlw $15, %k1, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kshiftlw $2, %k0, %k2 -; AVX512-NEXT: korw %k2, %k1, %k1 +; AVX512-NEXT: kshiftrw $14, %k1, %k1 ; AVX512-NEXT: korw %k1, %k0, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} @@ -1983,10 +1981,10 @@ ; AVX512-NEXT: setne %cl ; AVX512-NEXT: orb %al, %cl ; AVX512-NEXT: setne %al +; AVX512-NEXT: kmovd %eax, %k1 +; AVX512-NEXT: movw $-3, %ax ; AVX512-NEXT: kmovd %eax, %k0 -; AVX512-NEXT: kshiftlw $15, %k0, %k0 -; AVX512-NEXT: kshiftrw $15, %k0, %k1 -; AVX512-NEXT: kshiftlw $2, %k0, %k0 +; AVX512-NEXT: kandw %k0, %k1, %k1 ; AVX512-NEXT: movl %edx, %eax ; AVX512-NEXT: mulb %sil ; AVX512-NEXT: movl %eax, %edx @@ -1996,12 +1994,12 @@ ; AVX512-NEXT: orb %al, %cl ; AVX512-NEXT: setne %al ; AVX512-NEXT: kmovd %eax, %k2 -; AVX512-NEXT: kshiftlw $1, %k2, %k2 -; AVX512-NEXT: korw %k2, %k0, %k2 -; AVX512-NEXT: korw %k2, %k1, %k1 -; AVX512-NEXT: kshiftlw $14, %k1, %k1 -; AVX512-NEXT: kshiftrw $14, %k1, %k1 -; AVX512-NEXT: kshiftlw $3, %k0, %k2 +; AVX512-NEXT: kshiftlw $15, %k2, %k2 +; AVX512-NEXT: kshiftrw $14, %k2, %k2 +; AVX512-NEXT: korw %k2, %k1, %k2 +; AVX512-NEXT: movw $-5, %ax +; AVX512-NEXT: kmovd %eax, %k1 +; AVX512-NEXT: kandw %k1, %k2, %k2 ; AVX512-NEXT: movl %r11d, %eax ; AVX512-NEXT: mulb %bl ; AVX512-NEXT: movl %eax, %esi @@ -2013,9 +2011,8 @@ ; AVX512-NEXT: kmovd %eax, %k3 ; AVX512-NEXT: kshiftlw $2, %k3, %k3 ; AVX512-NEXT: korw %k3, %k2, %k2 -; AVX512-NEXT: korw %k2, %k1, %k1 -; AVX512-NEXT: kshiftlw $13, %k1, %k1 -; AVX512-NEXT: kshiftrw $13, %k1, %k1 +; AVX512-NEXT: kshiftlw $13, %k2, %k2 +; AVX512-NEXT: kshiftrw $13, %k2, %k2 ; AVX512-NEXT: movl %r9d, %eax ; AVX512-NEXT: mulb %r10b ; AVX512-NEXT: # kill: def $al killed $al def $eax @@ -2024,37 +2021,29 @@ ; AVX512-NEXT: setne %bl ; AVX512-NEXT: orb %cl, %bl ; AVX512-NEXT: setne %cl -; AVX512-NEXT: kmovd %ecx, %k2 -; AVX512-NEXT: kshiftlw $3, %k2, %k2 -; AVX512-NEXT: korw %k2, %k1, %k1 +; AVX512-NEXT: kmovd %ecx, %k3 +; AVX512-NEXT: kshiftlw $3, %k3, %k3 +; AVX512-NEXT: korw %k3, %k2, %k2 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: kmovd %r8d, %k1 -; AVX512-NEXT: kshiftlw $15, %k1, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k2} {z} +; AVX512-NEXT: kmovd %r8d, %k2 +; AVX512-NEXT: kandw %k0, %k2, %k0 ; AVX512-NEXT: kmovd %edx, %k2 ; AVX512-NEXT: kshiftlw $15, %k2, %k2 ; AVX512-NEXT: kshiftrw $14, %k2, %k2 ; AVX512-NEXT: korw %k2, %k0, %k0 -; AVX512-NEXT: korw %k0, %k1, %k0 -; AVX512-NEXT: kshiftrw $3, %k0, %k1 -; AVX512-NEXT: kshiftlw $3, %k1, %k1 -; AVX512-NEXT: kshiftlw $14, %k0, %k0 -; AVX512-NEXT: kshiftrw $14, %k0, %k0 -; AVX512-NEXT: kmovd %esi, %k2 -; AVX512-NEXT: kshiftlw $15, %k2, %k2 -; AVX512-NEXT: kshiftrw $13, %k2, %k2 -; AVX512-NEXT: korw %k2, %k1, %k1 -; AVX512-NEXT: korw %k1, %k0, %k0 -; AVX512-NEXT: kshiftrw $4, %k0, %k1 -; AVX512-NEXT: kshiftlw $4, %k1, %k1 -; AVX512-NEXT: kshiftlw $13, %k0, %k0 -; AVX512-NEXT: kshiftrw $13, %k0, %k0 +; AVX512-NEXT: kandw %k1, %k0, %k0 +; AVX512-NEXT: kmovd %esi, %k1 +; AVX512-NEXT: kshiftlw $15, %k1, %k1 +; AVX512-NEXT: kshiftrw $13, %k1, %k1 ; AVX512-NEXT: korw %k1, %k0, %k0 +; AVX512-NEXT: movw $-9, %cx +; AVX512-NEXT: kmovd %ecx, %k1 +; AVX512-NEXT: kandw %k1, %k0, %k0 ; AVX512-NEXT: kmovd %eax, %k1 ; AVX512-NEXT: kshiftlw $15, %k1, %k1 ; AVX512-NEXT: kshiftrw $12, %k1, %k1 -; AVX512-NEXT: korw %k0, %k1, %k0 +; AVX512-NEXT: korw %k1, %k0, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: movb %al, (%rdi) ; AVX512-NEXT: popq %rbx