diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1362,6 +1362,29 @@ } } + // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I) + // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits). + if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && + (Op0.getOperand(0).isUndef() || + ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) && + Op0->hasOneUse()) { + unsigned NumSubElts = + Op0.getOperand(1).getValueType().getVectorNumElements(); + unsigned SubIdx = Op0.getConstantOperandVal(2); + APInt DemandedSub = + APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts); + KnownBits KnownSubMask = + TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1); + if (DemandedBits.isSubsetOf(KnownSubMask.One)) { + SDValue NewAnd = + TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1); + SDValue NewInsert = + TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd, + Op0.getOperand(1), Op0.getOperand(2)); + return TLO.CombineTo(Op, NewInsert); + } + } + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1)) return true; diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll --- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll +++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll @@ -679,13 +679,17 @@ ; KNL-NEXT: pushq %r13 ; KNL-NEXT: pushq %r12 ; KNL-NEXT: pushq %rbx +; KNL-NEXT: xorl %r10d, %r10d +; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp) +; KNL-NEXT: movl $65535, %eax ## imm = 0xFFFF +; KNL-NEXT: movl $0, %r11d +; KNL-NEXT: cmovnel %eax, %r11d +; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp) +; KNL-NEXT: cmovnel %eax, %r10d ; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: movw $-3, %di -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: andl $1, %edi ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 @@ -755,8 +759,8 @@ ; KNL-NEXT: kshiftrw $7, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-513, %di ## imm = 0xFDFF -; KNL-NEXT: kmovw %edi, %k5 -; KNL-NEXT: kandw %k5, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k7 +; KNL-NEXT: kandw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 @@ -785,83 +789,82 @@ ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $3, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k1 +; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-8193, %di ## imm = 0xDFFF -; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kandw %k0, %k1, %k1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k6 -; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $2, %k6, %k6 -; KNL-NEXT: korw %k6, %k1, %k6 +; KNL-NEXT: kmovw %edi, %k5 +; KNL-NEXT: kshiftlw $15, %k5, %k5 +; KNL-NEXT: kshiftrw $2, %k5, %k5 +; KNL-NEXT: korw %k5, %k0, %k5 ; KNL-NEXT: movw $-16385, %di ## imm = 0xBFFF -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kandw %k1, %k6, %k6 +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kandw %k0, %k5, %k5 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kshiftlw $1, %k6, %k6 -; KNL-NEXT: kshiftrw $1, %k6, %k6 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $14, %k6, %k6 +; KNL-NEXT: korw %k6, %k5, %k5 +; KNL-NEXT: kshiftlw $1, %k5, %k5 +; KNL-NEXT: kshiftrw $1, %k5, %k5 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: korw %k6, %k5, %k5 +; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: andl $1, %esi +; KNL-NEXT: kmovw %edx, %k5 +; KNL-NEXT: kshiftlw $15, %k5, %k5 +; KNL-NEXT: kshiftrw $14, %k5, %k5 ; KNL-NEXT: kmovw %esi, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k6, %k6 -; KNL-NEXT: kmovw %edx, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k6, %k6 -; KNL-NEXT: kmovw %ecx, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $13, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k6, %k6 -; KNL-NEXT: kmovw %r8d, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $12, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k6, %k6 -; KNL-NEXT: kmovw %r9d, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $11, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k6, %k6 +; KNL-NEXT: korw %k5, %k6, %k5 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; KNL-NEXT: kandw %k6, %k5, %k5 +; KNL-NEXT: kmovw %ecx, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $13, %k6, %k6 +; KNL-NEXT: korw %k6, %k5, %k5 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; KNL-NEXT: kandw %k6, %k5, %k5 +; KNL-NEXT: kmovw %r8d, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $12, %k6, %k6 +; KNL-NEXT: korw %k6, %k5, %k5 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; KNL-NEXT: kandw %k6, %k5, %k5 +; KNL-NEXT: kmovw %r9d, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $11, %k6, %k6 +; KNL-NEXT: korw %k6, %k5, %k5 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; KNL-NEXT: kandw %k6, %k5, %k5 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k6, %k6 +; KNL-NEXT: kmovw %ecx, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $10, %k6, %k6 +; KNL-NEXT: korw %k6, %k5, %k5 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; KNL-NEXT: kandw %k6, %k5, %k5 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k6, %k6 +; KNL-NEXT: kmovw %ecx, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $9, %k6, %k6 +; KNL-NEXT: korw %k6, %k5, %k5 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; KNL-NEXT: kandw %k6, %k5, %k5 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $8, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k6, %k6 +; KNL-NEXT: kmovw %ecx, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $8, %k6, %k6 +; KNL-NEXT: korw %k6, %k5, %k5 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; KNL-NEXT: kandw %k6, %k5, %k5 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $7, %k7, %k7 -; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kandw %k5, %k6, %k5 +; KNL-NEXT: kmovw %ecx, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $7, %k6, %k6 +; KNL-NEXT: korw %k6, %k5, %k5 +; KNL-NEXT: kandw %k7, %k5, %k5 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 @@ -885,34 +888,27 @@ ; KNL-NEXT: kshiftlw $15, %k3, %k3 ; KNL-NEXT: kshiftrw $3, %k3, %k3 ; KNL-NEXT: korw %k3, %k2, %k2 -; KNL-NEXT: kandw %k0, %k2, %k0 +; KNL-NEXT: kandw %k1, %k2, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $2, %k2, %k2 -; KNL-NEXT: korw %k2, %k0, %k0 -; KNL-NEXT: xorl %ecx, %ecx -; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp) -; KNL-NEXT: movl $65535, %edx ## imm = 0xFFFF -; KNL-NEXT: movl $0, %esi -; KNL-NEXT: cmovnel %edx, %esi -; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp) -; KNL-NEXT: cmovnel %edx, %ecx -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dl -; KNL-NEXT: kmovw %edx, %k1 +; KNL-NEXT: korw %k2, %k1, %k1 +; KNL-NEXT: kandw %k0, %k1, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl +; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: kshiftlw $14, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dl -; KNL-NEXT: kmovw %edx, %k1 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl +; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: kmovw %esi, %k1 +; KNL-NEXT: kmovw %r11d, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: kmovw %ecx, %k2 +; KNL-NEXT: kmovw %r10d, %k2 ; KNL-NEXT: kandw %k1, %k2, %k1 ; KNL-NEXT: kmovw %k1, %r8d ; KNL-NEXT: kshiftrw $1, %k0, %k1 @@ -1008,16 +1004,14 @@ ; SKX-NEXT: pushq %r13 ; SKX-NEXT: pushq %r12 ; SKX-NEXT: pushq %rbx -; SKX-NEXT: movq %rdi, %rax ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; SKX-NEXT: movq %rdi, %rax +; SKX-NEXT: kshiftld $31, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: movl $-3, %edi -; SKX-NEXT: kmovd %edi, %k2 -; SKX-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SKX-NEXT: kandd %k2, %k0, %k0 +; SKX-NEXT: kshiftrd $30, %k0, %k0 ; SKX-NEXT: kshiftld $31, %k1, %k1 -; SKX-NEXT: kshiftrd $30, %k1, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: kshiftrd $31, %k1, %k1 +; SKX-NEXT: kord %k0, %k1, %k0 ; SKX-NEXT: movl $-5, %edi ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill @@ -1034,10 +1028,10 @@ ; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $28, %k1, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: movl $-17, %edi ; SKX-NEXT: kmovd %edi, %k2 ; SKX-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kandd %k2, %k0, %k0 ; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $27, %k1, %k1 @@ -1058,10 +1052,10 @@ ; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $25, %k1, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: movl $-129, %edi ; SKX-NEXT: kmovd %edi, %k2 ; SKX-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kandd %k2, %k0, %k0 ; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $24, %k1, %k1 @@ -1082,10 +1076,11 @@ ; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $22, %k1, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: movl $-1025, %edi ## imm = 0xFBFF -; SKX-NEXT: kmovd %edi, %k6 -; SKX-NEXT: kandd %k6, %k0, %k0 +; SKX-NEXT: kmovd %edi, %k2 +; SKX-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kandd %k2, %k0, %k0 ; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $21, %k1, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 @@ -1098,23 +1093,22 @@ ; SKX-NEXT: kshiftrd $20, %k1, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 ; SKX-NEXT: movl $-4097, %edi ## imm = 0xEFFF -; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kmovd %edi, %k6 +; SKX-NEXT: kandd %k6, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $19, %k1, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: movl $-8193, %edi ## imm = 0xDFFF -; SKX-NEXT: kmovd %edi, %k4 -; SKX-NEXT: kandd %k4, %k0, %k0 +; SKX-NEXT: kmovd %edi, %k5 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kandd %k5, %k0, %k0 ; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $18, %k1, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 ; SKX-NEXT: movl $-16385, %edi ## imm = 0xBFFF -; SKX-NEXT: kmovd %edi, %k5 -; SKX-NEXT: kandd %k5, %k0, %k0 +; SKX-NEXT: kmovd %edi, %k4 +; SKX-NEXT: kandd %k4, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $17, %k1, %k1 @@ -1126,21 +1120,21 @@ ; SKX-NEXT: kshiftld $31, %k7, %k7 ; SKX-NEXT: kshiftrd $16, %k7, %k7 ; SKX-NEXT: kord %k7, %k0, %k7 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 ; SKX-NEXT: movl $-65537, %edi ## imm = 0xFFFEFFFF ; SKX-NEXT: kmovd %edi, %k2 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 ; SKX-NEXT: kandd %k2, %k7, %k7 ; SKX-NEXT: kshiftld $31, %k0, %k0 ; SKX-NEXT: kshiftrd $15, %k0, %k0 ; SKX-NEXT: kord %k0, %k7, %k0 ; SKX-NEXT: kmovd %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SKX-NEXT: kmovd %esi, %k0 -; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kmovd %edx, %k7 +; SKX-NEXT: kmovd %edx, %k0 +; SKX-NEXT: kshiftld $31, %k0, %k0 +; SKX-NEXT: kshiftrd $30, %k0, %k0 +; SKX-NEXT: kmovd %esi, %k7 ; SKX-NEXT: kshiftld $31, %k7, %k7 -; SKX-NEXT: kshiftrd $30, %k7, %k7 -; SKX-NEXT: kord %k7, %k0, %k0 +; SKX-NEXT: kshiftrd $31, %k7, %k7 +; SKX-NEXT: kord %k0, %k7, %k0 ; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload ; SKX-NEXT: kandd %k1, %k0, %k0 ; SKX-NEXT: kmovd %ecx, %k7 @@ -1189,29 +1183,29 @@ ; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $22, %k1, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kandd %k6, %k0, %k0 +; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload +; SKX-NEXT: kandd %k1, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $21, %k1, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 4-byte Reload -; SKX-NEXT: kandd %k6, %k0, %k0 +; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 4-byte Reload +; SKX-NEXT: kandd %k7, %k0, %k0 ; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $20, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 ; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kshiftld $31, %k6, %k1 +; SKX-NEXT: kandd %k6, %k0, %k0 +; SKX-NEXT: kshiftld $31, %k7, %k1 ; SKX-NEXT: kshiftrd $19, %k1, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kandd %k4, %k0, %k0 +; SKX-NEXT: kandd %k5, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $18, %k1, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kandd %k5, %k0, %k0 +; SKX-NEXT: kandd %k4, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftld $31, %k1, %k1 ; SKX-NEXT: kshiftrd $17, %k1, %k1 @@ -1321,243 +1315,239 @@ ; KNL_X32-NEXT: pushl %ebx ; KNL_X32-NEXT: pushl %edi ; KNL_X32-NEXT: pushl %esi -; KNL_X32-NEXT: subl $20, %esp -; KNL_X32-NEXT: movw $-3, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 -; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: subl $16, %esp +; KNL_X32-NEXT: xorl %eax, %eax +; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) +; KNL_X32-NEXT: movl $65535, %edx ## imm = 0xFFFF +; KNL_X32-NEXT: movl $0, %ecx +; KNL_X32-NEXT: cmovnel %edx, %ecx +; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) +; KNL_X32-NEXT: cmovnel %edx, %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: andl $1, %edx +; KNL_X32-NEXT: kmovw %edx, %k0 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $14, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-5, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movw $-5, %dx +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $13, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-9, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movw $-9, %dx +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $12, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-17, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movw $-17, %dx +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $11, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-33, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movw $-33, %dx +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $10, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-65, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movw $-65, %dx +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $9, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-129, %ax -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movw $-129, %dx +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $8, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-257, %ax ## imm = 0xFEFF -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movw $-257, %dx ## imm = 0xFEFF +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $7, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-513, %ax ## imm = 0xFDFF -; KNL_X32-NEXT: kmovw %eax, %k5 -; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movw $-513, %dx ## imm = 0xFDFF +; KNL_X32-NEXT: kmovw %edx, %k7 +; KNL_X32-NEXT: kandw %k7, %k0, %k0 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $6, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-1025, %ax ## imm = 0xFBFF -; KNL_X32-NEXT: kmovw %eax, %k4 +; KNL_X32-NEXT: movw $-1025, %dx ## imm = 0xFBFF +; KNL_X32-NEXT: kmovw %edx, %k4 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $5, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-2049, %ax ## imm = 0xF7FF -; KNL_X32-NEXT: kmovw %eax, %k3 +; KNL_X32-NEXT: movw $-2049, %dx ## imm = 0xF7FF +; KNL_X32-NEXT: kmovw %edx, %k3 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $4, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-4097, %ax ## imm = 0xEFFF -; KNL_X32-NEXT: kmovw %eax, %k2 +; KNL_X32-NEXT: movw $-4097, %dx ## imm = 0xEFFF +; KNL_X32-NEXT: kmovw %edx, %k2 ; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $3, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: movw $-8193, %ax ## imm = 0xDFFF -; KNL_X32-NEXT: kmovw %eax, %k1 +; KNL_X32-NEXT: movw $-8193, %dx ## imm = 0xDFFF +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k5 +; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 +; KNL_X32-NEXT: kshiftrw $2, %k5, %k5 +; KNL_X32-NEXT: korw %k5, %k0, %k5 +; KNL_X32-NEXT: movw $-16385, %dx ## imm = 0xBFFF +; KNL_X32-NEXT: kmovw %edx, %k0 +; KNL_X32-NEXT: kandw %k0, %k5, %k5 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k6 +; KNL_X32-NEXT: kshiftlw $14, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k5, %k5 +; KNL_X32-NEXT: kshiftlw $1, %k5, %k5 +; KNL_X32-NEXT: kshiftrw $1, %k5, %k5 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $2, %k6, %k6 -; KNL_X32-NEXT: korw %k6, %k0, %k6 -; KNL_X32-NEXT: movw $-16385, %ax ## imm = 0xBFFF -; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kandw %k0, %k6, %k6 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $14, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kshiftlw $1, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $1, %k6, %k6 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kmovw %k6, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k7, %k6, %k6 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k7, %k6, %k6 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k7, %k6, %k6 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k7, %k6, %k6 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k7, %k6, %k6 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k7, %k6, %k6 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k7, %k6, %k6 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $8, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; KNL_X32-NEXT: kandw %k7, %k6, %k6 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $7, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k6, %k6 -; KNL_X32-NEXT: kandw %k5, %k6, %k5 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: korw %k6, %k5, %k5 +; KNL_X32-NEXT: kmovw %k5, (%esp) ## 2-byte Spill +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: andl $1, %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %bl +; KNL_X32-NEXT: kmovw %ebx, %k5 +; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 +; KNL_X32-NEXT: kshiftrw $14, %k5, %k5 +; KNL_X32-NEXT: kmovw %edx, %k6 +; KNL_X32-NEXT: korw %k5, %k6, %k5 +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload +; KNL_X32-NEXT: kandw %k6, %k5, %k5 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k5, %k5 +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload +; KNL_X32-NEXT: kandw %k6, %k5, %k5 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k5, %k5 +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload +; KNL_X32-NEXT: kandw %k6, %k5, %k5 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k5, %k5 +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload +; KNL_X32-NEXT: kandw %k6, %k5, %k5 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k5, %k5 +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload +; KNL_X32-NEXT: kandw %k6, %k5, %k5 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k5, %k5 +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload +; KNL_X32-NEXT: kandw %k6, %k5, %k5 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $8, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k5, %k5 +; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload +; KNL_X32-NEXT: kandw %k6, %k5, %k5 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $7, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k5, %k5 +; KNL_X32-NEXT: kandw %k7, %k5, %k5 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $6, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k5, %k5 ; KNL_X32-NEXT: kandw %k4, %k5, %k4 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k5 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k5 ; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 ; KNL_X32-NEXT: kshiftrw $5, %k5, %k5 ; KNL_X32-NEXT: korw %k5, %k4, %k4 ; KNL_X32-NEXT: kandw %k3, %k4, %k3 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k4 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k4 ; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 ; KNL_X32-NEXT: kshiftrw $4, %k4, %k4 ; KNL_X32-NEXT: korw %k4, %k3, %k3 ; KNL_X32-NEXT: kandw %k2, %k3, %k2 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k3 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k3 ; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 ; KNL_X32-NEXT: kshiftrw $3, %k3, %k3 ; KNL_X32-NEXT: korw %k3, %k2, %k2 ; KNL_X32-NEXT: kandw %k1, %k2, %k1 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $2, %k2, %k2 ; KNL_X32-NEXT: korw %k2, %k1, %k1 -; KNL_X32-NEXT: xorl %eax, %eax -; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) -; KNL_X32-NEXT: movl $65535, %ecx ## imm = 0xFFFF -; KNL_X32-NEXT: movl $0, %edx -; KNL_X32-NEXT: cmovnel %ecx, %edx ; KNL_X32-NEXT: kandw %k0, %k1, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %bl -; KNL_X32-NEXT: kmovw %ebx, %k1 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $14, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 ; KNL_X32-NEXT: kshiftlw $1, %k0, %k0 ; KNL_X32-NEXT: kshiftrw $1, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %bl -; KNL_X32-NEXT: kmovw %ebx, %k1 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 -; KNL_X32-NEXT: kmovw %edx, %k1 -; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) -; KNL_X32-NEXT: cmovnel %ecx, %eax -; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload +; KNL_X32-NEXT: kmovw %ecx, %k1 +; KNL_X32-NEXT: kmovw (%esp), %k2 ## 2-byte Reload ; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kandw %k1, %k2, %k1 @@ -1640,7 +1630,7 @@ ; KNL_X32-NEXT: orl %ebx, %ecx ; KNL_X32-NEXT: orl %edx, %ecx ; KNL_X32-NEXT: movw %cx, (%eax) -; KNL_X32-NEXT: addl $20, %esp +; KNL_X32-NEXT: addl $16, %esp ; KNL_X32-NEXT: popl %esi ; KNL_X32-NEXT: popl %edi ; KNL_X32-NEXT: popl %ebx @@ -1655,16 +1645,14 @@ ; FASTISEL-NEXT: pushq %r13 ; FASTISEL-NEXT: pushq %r12 ; FASTISEL-NEXT: pushq %rbx -; FASTISEL-NEXT: movq %rdi, %rax ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; FASTISEL-NEXT: movq %rdi, %rax +; FASTISEL-NEXT: kshiftld $31, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: movl $-3, %edi -; FASTISEL-NEXT: kmovd %edi, %k2 -; FASTISEL-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; FASTISEL-NEXT: kandd %k2, %k0, %k0 +; FASTISEL-NEXT: kshiftrd $30, %k0, %k0 ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 -; FASTISEL-NEXT: kshiftrd $30, %k1, %k1 -; FASTISEL-NEXT: kord %k1, %k0, %k0 +; FASTISEL-NEXT: kshiftrd $31, %k1, %k1 +; FASTISEL-NEXT: kord %k0, %k1, %k0 ; FASTISEL-NEXT: movl $-5, %edi ; FASTISEL-NEXT: kmovd %edi, %k1 ; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill @@ -1681,10 +1669,10 @@ ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 ; FASTISEL-NEXT: kshiftrd $28, %k1, %k1 ; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: movl $-17, %edi ; FASTISEL-NEXT: kmovd %edi, %k2 ; FASTISEL-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kandd %k2, %k0, %k0 ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 ; FASTISEL-NEXT: kshiftrd $27, %k1, %k1 @@ -1705,10 +1693,10 @@ ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 ; FASTISEL-NEXT: kshiftrd $25, %k1, %k1 ; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: movl $-129, %edi ; FASTISEL-NEXT: kmovd %edi, %k2 ; FASTISEL-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kandd %k2, %k0, %k0 ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 ; FASTISEL-NEXT: kshiftrd $24, %k1, %k1 @@ -1729,10 +1717,11 @@ ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 ; FASTISEL-NEXT: kshiftrd $22, %k1, %k1 ; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: movl $-1025, %edi ## imm = 0xFBFF -; FASTISEL-NEXT: kmovd %edi, %k6 -; FASTISEL-NEXT: kandd %k6, %k0, %k0 +; FASTISEL-NEXT: kmovd %edi, %k2 +; FASTISEL-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; FASTISEL-NEXT: kandd %k2, %k0, %k0 ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 ; FASTISEL-NEXT: kshiftrd $21, %k1, %k1 ; FASTISEL-NEXT: kord %k1, %k0, %k0 @@ -1745,23 +1734,22 @@ ; FASTISEL-NEXT: kshiftrd $20, %k1, %k1 ; FASTISEL-NEXT: kord %k1, %k0, %k0 ; FASTISEL-NEXT: movl $-4097, %edi ## imm = 0xEFFF -; FASTISEL-NEXT: kmovd %edi, %k1 -; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; FASTISEL-NEXT: kandd %k1, %k0, %k0 +; FASTISEL-NEXT: kmovd %edi, %k6 +; FASTISEL-NEXT: kandd %k6, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 ; FASTISEL-NEXT: kshiftrd $19, %k1, %k1 ; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: movl $-8193, %edi ## imm = 0xDFFF -; FASTISEL-NEXT: kmovd %edi, %k4 -; FASTISEL-NEXT: kandd %k4, %k0, %k0 +; FASTISEL-NEXT: kmovd %edi, %k5 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; FASTISEL-NEXT: kandd %k5, %k0, %k0 ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 ; FASTISEL-NEXT: kshiftrd $18, %k1, %k1 ; FASTISEL-NEXT: kord %k1, %k0, %k0 ; FASTISEL-NEXT: movl $-16385, %edi ## imm = 0xBFFF -; FASTISEL-NEXT: kmovd %edi, %k5 -; FASTISEL-NEXT: kandd %k5, %k0, %k0 +; FASTISEL-NEXT: kmovd %edi, %k4 +; FASTISEL-NEXT: kandd %k4, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 ; FASTISEL-NEXT: kshiftrd $17, %k1, %k1 @@ -1773,21 +1761,21 @@ ; FASTISEL-NEXT: kshiftld $31, %k7, %k7 ; FASTISEL-NEXT: kshiftrd $16, %k7, %k7 ; FASTISEL-NEXT: kord %k7, %k0, %k7 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 ; FASTISEL-NEXT: movl $-65537, %edi ## imm = 0xFFFEFFFF ; FASTISEL-NEXT: kmovd %edi, %k2 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 ; FASTISEL-NEXT: kandd %k2, %k7, %k7 ; FASTISEL-NEXT: kshiftld $31, %k0, %k0 ; FASTISEL-NEXT: kshiftrd $15, %k0, %k0 ; FASTISEL-NEXT: kord %k0, %k7, %k0 ; FASTISEL-NEXT: kmovd %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; FASTISEL-NEXT: kmovd %esi, %k0 -; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kmovd %edx, %k7 +; FASTISEL-NEXT: kmovd %edx, %k0 +; FASTISEL-NEXT: kshiftld $31, %k0, %k0 +; FASTISEL-NEXT: kshiftrd $30, %k0, %k0 +; FASTISEL-NEXT: kmovd %esi, %k7 ; FASTISEL-NEXT: kshiftld $31, %k7, %k7 -; FASTISEL-NEXT: kshiftrd $30, %k7, %k7 -; FASTISEL-NEXT: kord %k7, %k0, %k0 +; FASTISEL-NEXT: kshiftrd $31, %k7, %k7 +; FASTISEL-NEXT: kord %k0, %k7, %k0 ; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload ; FASTISEL-NEXT: kandd %k1, %k0, %k0 ; FASTISEL-NEXT: kmovd %ecx, %k7 @@ -1836,29 +1824,29 @@ ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 ; FASTISEL-NEXT: kshiftrd $22, %k1, %k1 ; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kandd %k6, %k0, %k0 +; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload +; FASTISEL-NEXT: kandd %k1, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 ; FASTISEL-NEXT: kshiftrd $21, %k1, %k1 ; FASTISEL-NEXT: kord %k1, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 4-byte Reload -; FASTISEL-NEXT: kandd %k6, %k0, %k0 +; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 4-byte Reload +; FASTISEL-NEXT: kandd %k7, %k0, %k0 ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 ; FASTISEL-NEXT: kshiftrd $20, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 ; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload -; FASTISEL-NEXT: kandd %k1, %k0, %k0 -; FASTISEL-NEXT: kshiftld $31, %k6, %k1 +; FASTISEL-NEXT: kandd %k6, %k0, %k0 +; FASTISEL-NEXT: kshiftld $31, %k7, %k1 ; FASTISEL-NEXT: kshiftrd $19, %k1, %k1 ; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kandd %k4, %k0, %k0 +; FASTISEL-NEXT: kandd %k5, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 ; FASTISEL-NEXT: kshiftrd $18, %k1, %k1 ; FASTISEL-NEXT: kord %k1, %k0, %k0 -; FASTISEL-NEXT: kandd %k5, %k0, %k0 +; FASTISEL-NEXT: kandd %k4, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 ; FASTISEL-NEXT: kshiftrd $17, %k1, %k1 @@ -1969,11 +1957,9 @@ ; KNL-LABEL: test17: ; KNL: ## %bb.0: ; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: movw $-3, %di -; KNL-NEXT: kmovw %edi, %k2 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: andl $1, %edi ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 @@ -1983,343 +1969,343 @@ ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k2 +; KNL-NEXT: kshiftlw $15, %k2, %k2 +; KNL-NEXT: kshiftrw $13, %k2, %k2 +; KNL-NEXT: korw %k2, %k0, %k0 +; KNL-NEXT: movw $-9, %di +; KNL-NEXT: kmovw %edi, %k2 +; KNL-NEXT: kandw %k2, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k3 ; KNL-NEXT: kshiftlw $15, %k3, %k3 -; KNL-NEXT: kshiftrw $13, %k3, %k3 +; KNL-NEXT: kshiftrw $12, %k3, %k3 ; KNL-NEXT: korw %k3, %k0, %k0 -; KNL-NEXT: movw $-9, %di +; KNL-NEXT: movw $-17, %di ; KNL-NEXT: kmovw %edi, %k3 ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k4 ; KNL-NEXT: kshiftlw $15, %k4, %k4 -; KNL-NEXT: kshiftrw $12, %k4, %k4 +; KNL-NEXT: kshiftrw $11, %k4, %k4 ; KNL-NEXT: korw %k4, %k0, %k0 -; KNL-NEXT: movw $-17, %di +; KNL-NEXT: movw $-33, %di ; KNL-NEXT: kmovw %edi, %k4 ; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k5 ; KNL-NEXT: kshiftlw $15, %k5, %k5 -; KNL-NEXT: kshiftrw $11, %k5, %k5 +; KNL-NEXT: kshiftrw $10, %k5, %k5 ; KNL-NEXT: korw %k5, %k0, %k0 -; KNL-NEXT: movw $-33, %di +; KNL-NEXT: movw $-65, %di ; KNL-NEXT: kmovw %edi, %k5 ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 -; KNL-NEXT: kshiftrw $10, %k6, %k6 +; KNL-NEXT: kshiftrw $9, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: movw $-65, %di -; KNL-NEXT: kmovw %edi, %k6 -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: andl $1, %edi ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $14, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $13, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $13, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $12, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $12, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $11, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $11, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $10, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $9, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; KNL-NEXT: andl $1, %r10d ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kmovw %r10d, %k6 +; KNL-NEXT: korw %k0, %k6, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $13, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $13, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $12, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $12, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $11, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $11, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $10, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $9, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; KNL-NEXT: andl $1, %r10d ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kmovw %r10d, %k6 +; KNL-NEXT: korw %k0, %k6, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $13, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $13, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $12, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $12, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $11, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $11, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $10, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $9, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; KNL-NEXT: andl $1, %r10d ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kmovw %r10d, %k6 +; KNL-NEXT: korw %k0, %k6, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $13, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $13, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $12, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $12, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $11, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $11, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $10, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $9, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; KNL-NEXT: andl $1, %r10d ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kmovw %r10d, %k6 +; KNL-NEXT: korw %k0, %k6, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $13, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $13, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $12, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $12, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $11, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $11, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $10, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $9, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; KNL-NEXT: andl $1, %r10d ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kshiftlw $15, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kmovw %r10d, %k6 +; KNL-NEXT: korw %k0, %k6, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $13, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $13, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $12, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $12, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $11, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $11, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $10, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil -; KNL-NEXT: kmovw %edi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $9, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kmovw %esi, %k0 -; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: kmovw %edx, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: andl $1, %esi +; KNL-NEXT: kmovw %edx, %k0 +; KNL-NEXT: kshiftlw $15, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kmovw %esi, %k7 +; KNL-NEXT: korw %k0, %k7, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %ecx, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $13, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k3, %k0, %k0 +; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: kmovw %r8d, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $12, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k4, %k0, %k0 +; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: kmovw %r9d, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $11, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k5, %k0, %k0 +; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $10, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k6, %k0, %k0 +; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $9, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k7 -; KNL-NEXT: kandw %k2, %k7, %k2 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k7 +; KNL-NEXT: andl $1, %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dl +; KNL-NEXT: kmovw %edx, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k2, %k2 -; KNL-NEXT: kandw %k1, %k2, %k1 +; KNL-NEXT: kmovw %ecx, %k6 +; KNL-NEXT: korw %k7, %k6, %k6 +; KNL-NEXT: kandw %k1, %k6, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl -; KNL-NEXT: kmovw %ecx, %k2 -; KNL-NEXT: kshiftlw $15, %k2, %k2 -; KNL-NEXT: kshiftrw $13, %k2, %k2 -; KNL-NEXT: korw %k2, %k1, %k1 -; KNL-NEXT: kandw %k3, %k1, %k1 +; KNL-NEXT: kmovw %ecx, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $13, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 +; KNL-NEXT: kandw %k2, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $12, %k2, %k2 ; KNL-NEXT: korw %k2, %k1, %k1 -; KNL-NEXT: kandw %k4, %k1, %k1 +; KNL-NEXT: kandw %k3, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $11, %k2, %k2 ; KNL-NEXT: korw %k2, %k1, %k1 -; KNL-NEXT: kandw %k5, %k1, %k1 +; KNL-NEXT: kandw %k4, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $10, %k2, %k2 ; KNL-NEXT: korw %k2, %k1, %k1 -; KNL-NEXT: kandw %k6, %k1, %k1 +; KNL-NEXT: kandw %k5, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 @@ -2378,84 +2364,83 @@ ; SKX-LABEL: test17: ; SKX: ## %bb.0: ; SKX-NEXT: movq %rdi, %rax -; SKX-NEXT: movb $-3, %dil +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k0, %k0 +; SKX-NEXT: kshiftrb $6, %k0, %k0 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $7, %k1, %k1 +; SKX-NEXT: korb %k0, %k1, %k0 +; SKX-NEXT: movb $-5, %dil ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 ; SKX-NEXT: kandb %k1, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $6, %k1, %k1 +; SKX-NEXT: kshiftrb $5, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: movb $-5, %dil +; SKX-NEXT: movb $-9, %dil ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; SKX-NEXT: kandb %k1, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 ; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $5, %k2, %k2 +; SKX-NEXT: kshiftrb $4, %k2, %k2 ; SKX-NEXT: korb %k2, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; SKX-NEXT: movb $-9, %dil -; SKX-NEXT: kmovd %edi, %k7 -; SKX-NEXT: kandb %k7, %k0, %k0 -; SKX-NEXT: kshiftlb $7, %k3, %k3 -; SKX-NEXT: kshiftrb $4, %k3, %k3 -; SKX-NEXT: korb %k3, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 ; SKX-NEXT: movb $-17, %dil +; SKX-NEXT: kmovd %edi, %k3 +; SKX-NEXT: kandb %k3, %k0, %k0 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $3, %k2, %k2 +; SKX-NEXT: korb %k2, %k0, %k0 +; SKX-NEXT: movb $-33, %dil ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; SKX-NEXT: kandb %k1, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 ; SKX-NEXT: kshiftlb $7, %k4, %k4 -; SKX-NEXT: kshiftrb $3, %k4, %k4 +; SKX-NEXT: kshiftrb $2, %k4, %k4 ; SKX-NEXT: korb %k4, %k0, %k0 -; SKX-NEXT: movb $-33, %dil -; SKX-NEXT: kmovd %edi, %k4 -; SKX-NEXT: kandb %k4, %k0, %k0 +; SKX-NEXT: movb $-65, %dil +; SKX-NEXT: kmovd %edi, %k6 +; SKX-NEXT: kandb %k6, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 ; SKX-NEXT: kshiftlb $7, %k5, %k5 -; SKX-NEXT: kshiftrb $2, %k5, %k5 +; SKX-NEXT: kshiftrb $1, %k5, %k5 ; SKX-NEXT: korb %k5, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 -; SKX-NEXT: movb $-65, %dil -; SKX-NEXT: kmovd %edi, %k5 -; SKX-NEXT: kandb %k5, %k0, %k1 -; SKX-NEXT: kshiftlb $7, %k6, %k6 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; SKX-NEXT: kshiftrb $1, %k6, %k6 -; SKX-NEXT: korb %k6, %k1, %k1 -; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; SKX-NEXT: kandb %k6, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $6, %k1, %k1 -; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; SKX-NEXT: kandb %k3, %k0, %k2 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $5, %k1, %k1 +; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; SKX-NEXT: korb %k1, %k2, %k1 -; SKX-NEXT: kandb %k7, %k1, %k1 ; SKX-NEXT: kshiftlb $7, %k0, %k0 -; SKX-NEXT: kshiftrb $4, %k0, %k0 -; SKX-NEXT: korb %k0, %k1, %k0 +; SKX-NEXT: kshiftrb $6, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 +; SKX-NEXT: kshiftlb $7, %k7, %k7 +; SKX-NEXT: kshiftrb $7, %k7, %k7 +; SKX-NEXT: korb %k0, %k7, %k0 ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; SKX-NEXT: kandb %k2, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $3, %k1, %k1 -; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 +; SKX-NEXT: kshiftlb $7, %k7, %k7 +; SKX-NEXT: kshiftrb $5, %k7, %k7 +; SKX-NEXT: korb %k7, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; SKX-NEXT: kandb %k5, %k0, %k1 +; SKX-NEXT: kshiftlb $7, %k7, %k7 +; SKX-NEXT: kshiftrb $4, %k7, %k7 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; SKX-NEXT: korb %k7, %k1, %k1 +; SKX-NEXT: kandb %k3, %k1, %k1 +; SKX-NEXT: kshiftlb $7, %k0, %k0 +; SKX-NEXT: kshiftrb $3, %k0, %k0 +; SKX-NEXT: korb %k0, %k1, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload ; SKX-NEXT: kandb %k4, %k0, %k0 -; SKX-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $2, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: kandb %k5, %k0, %k0 +; SKX-NEXT: kandb %k6, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $1, %k1, %k1 @@ -2464,128 +2449,128 @@ ; SKX-NEXT: kandb %k1, %k0, %k0 ; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; SKX-NEXT: kandb %k6, %k0, %k0 +; SKX-NEXT: kshiftlb $7, %k0, %k0 +; SKX-NEXT: kshiftrb $6, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $6, %k1, %k1 -; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: kandb %k3, %k0, %k0 +; SKX-NEXT: kshiftrb $7, %k1, %k1 +; SKX-NEXT: korb %k0, %k1, %k0 +; SKX-NEXT: kandb %k2, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $5, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: kandb %k7, %k0, %k0 +; SKX-NEXT: kmovq %k5, %k7 +; SKX-NEXT: kandb %k5, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $4, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kmovq %k2, %k3 -; SKX-NEXT: kandb %k2, %k0, %k0 +; SKX-NEXT: kandb %k3, %k0, %k0 +; SKX-NEXT: kmovw %k3, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $3, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 ; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: kandb %k4, %k0, %k0 -; SKX-NEXT: kshiftlb $7, %k2, %k1 +; SKX-NEXT: kshiftlb $7, %k5, %k1 ; SKX-NEXT: kshiftrb $2, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: kandb %k5, %k0, %k0 +; SKX-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; SKX-NEXT: kandb %k6, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $1, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kmovq %k6, %k0 -; SKX-NEXT: kandb %k6, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $6, %k2, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload -; SKX-NEXT: kandb %k4, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k6, %k2 -; SKX-NEXT: kshiftrb $5, %k2, %k2 -; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $6, %k1, %k1 +; SKX-NEXT: kshiftlb $7, %k5, %k5 +; SKX-NEXT: kshiftrb $7, %k5, %k5 +; SKX-NEXT: korb %k1, %k5, %k1 +; SKX-NEXT: kandb %k2, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 +; SKX-NEXT: kshiftlb $7, %k5, %k5 +; SKX-NEXT: kshiftrb $5, %k5, %k5 +; SKX-NEXT: korb %k5, %k1, %k1 ; SKX-NEXT: kandb %k7, %k1, %k1 -; SKX-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $4, %k2, %k2 -; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 +; SKX-NEXT: kshiftlb $7, %k5, %k5 +; SKX-NEXT: kshiftrb $4, %k5, %k5 +; SKX-NEXT: korb %k5, %k1, %k1 ; SKX-NEXT: kandb %k3, %k1, %k1 -; SKX-NEXT: kmovq %k3, %k6 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $3, %k2, %k2 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; SKX-NEXT: kandb %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $2, %k2, %k2 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kandb %k5, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $1, %k2, %k2 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandb %k0, %k1, %k1 -; SKX-NEXT: kmovq %k0, %k3 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $6, %k2, %k2 -; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 +; SKX-NEXT: kshiftlb $7, %k5, %k5 +; SKX-NEXT: kshiftrb $3, %k5, %k5 +; SKX-NEXT: korb %k5, %k1, %k1 ; SKX-NEXT: kandb %k4, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $5, %k2, %k2 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kandb %k7, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $4, %k2, %k2 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 +; SKX-NEXT: kshiftlb $7, %k5, %k5 +; SKX-NEXT: kshiftrb $2, %k5, %k5 +; SKX-NEXT: korb %k5, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 ; SKX-NEXT: kandb %k6, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $3, %k2, %k2 +; SKX-NEXT: kshiftlb $7, %k5, %k5 +; SKX-NEXT: kshiftrb $1, %k5, %k5 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 +; SKX-NEXT: korb %k5, %k1, %k5 +; SKX-NEXT: kshiftlb $7, %k7, %k1 +; SKX-NEXT: kshiftrb $6, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 +; SKX-NEXT: kshiftlb $7, %k7, %k7 +; SKX-NEXT: kshiftrb $7, %k7, %k7 +; SKX-NEXT: korb %k1, %k7, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 +; SKX-NEXT: kandb %k2, %k1, %k1 +; SKX-NEXT: kshiftlb $7, %k7, %k7 +; SKX-NEXT: kshiftrb $5, %k7, %k7 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; SKX-NEXT: kandb %k7, %k1, %k1 +; SKX-NEXT: korb %k7, %k1, %k1 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload +; SKX-NEXT: kandb %k3, %k1, %k1 ; SKX-NEXT: kshiftlb $7, %k0, %k0 -; SKX-NEXT: kshiftrb $2, %k0, %k0 +; SKX-NEXT: kshiftrb $4, %k0, %k0 ; SKX-NEXT: korb %k0, %k1, %k0 -; SKX-NEXT: kandb %k5, %k0, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; SKX-NEXT: kandb %k6, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $1, %k1, %k1 +; SKX-NEXT: kshiftrb $3, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; SKX-NEXT: kandb %k1, %k0, %k0 -; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; SKX-NEXT: kandb %k1, %k0, %k0 -; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; SKX-NEXT: kandb %k3, %k0, %k0 +; SKX-NEXT: kandb %k4, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $6, %k1, %k1 +; SKX-NEXT: kshiftrb $2, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: kandb %k4, %k0, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; SKX-NEXT: kandb %k2, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $5, %k1, %k1 +; SKX-NEXT: kshiftrb $1, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload -; SKX-NEXT: kandb %k4, %k0, %k0 +; SKX-NEXT: kandb %k5, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; SKX-NEXT: kandb %k5, %k0, %k0 +; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; SKX-NEXT: kshiftlb $7, %k1, %k0 +; SKX-NEXT: kshiftrb $6, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $7, %k1, %k1 +; SKX-NEXT: korb %k0, %k1, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; SKX-NEXT: kandb %k5, %k0, %k0 ; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $5, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kandb %k3, %k0, %k0 +; SKX-NEXT: kshiftlb $7, %k7, %k1 ; SKX-NEXT: kshiftrb $4, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: kandb %k6, %k0, %k0 @@ -2593,60 +2578,28 @@ ; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $3, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kandb %k4, %k0, %k0 +; SKX-NEXT: kmovq %k4, %k7 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kandb %k7, %k0, %k0 ; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 ; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; SKX-NEXT: kandb %k5, %k0, %k0 -; SKX-NEXT: kshiftlb $7, %k2, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; SKX-NEXT: kandb %k2, %k0, %k0 +; SKX-NEXT: kmovq %k2, %k3 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $1, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: kandb %k3, %k2, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $6, %k2, %k2 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; SKX-NEXT: kandb %k6, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $5, %k2, %k2 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kandb %k4, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k6, %k2 -; SKX-NEXT: kshiftrb $4, %k2, %k2 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; SKX-NEXT: kandb %k6, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $3, %k2, %k2 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kandb %k7, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $2, %k2, %k2 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kandb %k5, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $1, %k2, %k2 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kandb %k0, %k1, %k0 ; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; SKX-NEXT: kandb %k3, %k0, %k0 +; SKX-NEXT: kshiftlb $7, %k0, %k0 +; SKX-NEXT: kshiftrb $6, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $6, %k1, %k1 -; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; SKX-NEXT: kandb %k3, %k0, %k0 +; SKX-NEXT: kshiftrb $7, %k1, %k1 +; SKX-NEXT: korb %k0, %k1, %k0 +; SKX-NEXT: kmovq %k5, %k4 +; SKX-NEXT: kandb %k5, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $5, %k1, %k1 @@ -2658,34 +2611,73 @@ ; SKX-NEXT: kshiftrb $4, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kandb %k6, %k0, %k2 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $3, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; SKX-NEXT: korb %k1, %k2, %k1 +; SKX-NEXT: kmovq %k7, %k2 +; SKX-NEXT: kandb %k7, %k1, %k1 +; SKX-NEXT: kshiftlb $7, %k0, %k0 +; SKX-NEXT: kshiftrb $2, %k0, %k0 +; SKX-NEXT: korb %k0, %k1, %k0 +; SKX-NEXT: kandb %k3, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $1, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; SKX-NEXT: kandb %k1, %k0, %k0 +; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 +; SKX-NEXT: kshiftlb $7, %k0, %k0 +; SKX-NEXT: kshiftrb $6, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $7, %k1, %k1 +; SKX-NEXT: korb %k0, %k1, %k0 +; SKX-NEXT: kandb %k4, %k0, %k0 +; SKX-NEXT: kmovq %k4, %k7 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $5, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovq %k5, %k3 +; SKX-NEXT: kandb %k5, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $4, %k1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: kandb %k6, %k0, %k0 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $3, %k1, %k1 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 ; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: kandb %k7, %k0, %k0 -; SKX-NEXT: kshiftlb $7, %k2, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; SKX-NEXT: kandb %k2, %k0, %k0 +; SKX-NEXT: kmovq %k2, %k5 +; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $2, %k1, %k1 +; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 ; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload ; SKX-NEXT: kandb %k4, %k0, %k0 -; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftlb $7, %k2, %k1 ; SKX-NEXT: kshiftrb $1, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: kmovd %esi, %k1 -; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; SKX-NEXT: kandb %k2, %k1, %k1 -; SKX-NEXT: kmovd %edx, %k2 +; SKX-NEXT: kmovd %edx, %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $6, %k1, %k1 +; SKX-NEXT: kmovd %esi, %k2 ; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $6, %k2, %k2 -; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kandb %k3, %k1, %k1 +; SKX-NEXT: kshiftrb $7, %k2, %k2 +; SKX-NEXT: korb %k1, %k2, %k1 +; SKX-NEXT: kandb %k7, %k1, %k1 ; SKX-NEXT: kmovd %ecx, %k2 ; SKX-NEXT: kshiftlb $7, %k2, %k2 ; SKX-NEXT: kshiftrb $5, %k2, %k2 ; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kandb %k5, %k1, %k1 +; SKX-NEXT: kandb %k3, %k1, %k1 ; SKX-NEXT: kmovd %r8d, %k2 ; SKX-NEXT: kshiftlb $7, %k2, %k2 ; SKX-NEXT: kshiftrb $4, %k2, %k2 @@ -2696,7 +2688,7 @@ ; SKX-NEXT: kshiftrb $3, %k2, %k2 ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 ; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kandb %k7, %k1, %k1 +; SKX-NEXT: kandb %k5, %k1, %k1 ; SKX-NEXT: kshiftlb $7, %k3, %k2 ; SKX-NEXT: kshiftrb $2, %k2, %k2 ; SKX-NEXT: korb %k2, %k1, %k1 @@ -2751,11 +2743,9 @@ ; KNL_X32: ## %bb.0: ; KNL_X32-NEXT: pushl %ebx ; KNL_X32-NEXT: subl $16, %esp -; KNL_X32-NEXT: movw $-3, %ax -; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_X32-NEXT: andl $1, %eax ; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 @@ -2765,348 +2755,348 @@ ; KNL_X32-NEXT: kmovw %eax, %k1 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_X32-NEXT: kmovw %eax, %k2 +; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 +; KNL_X32-NEXT: kshiftrw $13, %k2, %k2 +; KNL_X32-NEXT: korw %k2, %k0, %k0 +; KNL_X32-NEXT: movw $-9, %ax +; KNL_X32-NEXT: kmovw %eax, %k2 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k3 ; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 -; KNL_X32-NEXT: kshiftrw $13, %k3, %k3 +; KNL_X32-NEXT: kshiftrw $12, %k3, %k3 ; KNL_X32-NEXT: korw %k3, %k0, %k0 -; KNL_X32-NEXT: movw $-9, %ax +; KNL_X32-NEXT: movw $-17, %ax ; KNL_X32-NEXT: kmovw %eax, %k3 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k4 ; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 -; KNL_X32-NEXT: kshiftrw $12, %k4, %k4 +; KNL_X32-NEXT: kshiftrw $11, %k4, %k4 ; KNL_X32-NEXT: korw %k4, %k0, %k0 -; KNL_X32-NEXT: movw $-17, %ax +; KNL_X32-NEXT: movw $-33, %ax ; KNL_X32-NEXT: kmovw %eax, %k4 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k5 ; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 -; KNL_X32-NEXT: kshiftrw $11, %k5, %k5 +; KNL_X32-NEXT: kshiftrw $10, %k5, %k5 ; KNL_X32-NEXT: korw %k5, %k0, %k0 -; KNL_X32-NEXT: movw $-33, %ax +; KNL_X32-NEXT: movw $-65, %ax ; KNL_X32-NEXT: kmovw %eax, %k5 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 -; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 -; KNL_X32-NEXT: movw $-65, %ax -; KNL_X32-NEXT: kmovw %eax, %k6 -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_X32-NEXT: andl $1, %eax ; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $14, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 -; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 +; KNL_X32-NEXT: kandw %k3, %k0, %k0 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: andl $1, %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; KNL_X32-NEXT: kmovw %ecx, %k0 +; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 +; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: korw %k0, %k6, %k0 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: andl $1, %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; KNL_X32-NEXT: kmovw %ecx, %k0 +; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 +; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: korw %k0, %k6, %k0 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: andl $1, %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; KNL_X32-NEXT: kmovw %ecx, %k0 +; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 +; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: korw %k0, %k6, %k0 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: andl $1, %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; KNL_X32-NEXT: kmovw %ecx, %k0 +; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 +; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: korw %k0, %k6, %k0 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: andl $1, %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; KNL_X32-NEXT: kmovw %ecx, %k0 +; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 +; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: korw %k0, %k6, %k0 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 -; KNL_X32-NEXT: kandw %k6, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al +; KNL_X32-NEXT: andl $1, %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; KNL_X32-NEXT: kmovw %ecx, %k0 +; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 +; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 ; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 -; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k0, %k0 +; KNL_X32-NEXT: korw %k0, %k7, %k0 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 ; KNL_X32-NEXT: korw %k7, %k0, %k0 -; KNL_X32-NEXT: kandw %k3, %k0, %k0 +; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 ; KNL_X32-NEXT: korw %k7, %k0, %k0 -; KNL_X32-NEXT: kandw %k4, %k0, %k0 +; KNL_X32-NEXT: kandw %k3, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 ; KNL_X32-NEXT: korw %k7, %k0, %k0 -; KNL_X32-NEXT: kandw %k5, %k0, %k0 +; KNL_X32-NEXT: kandw %k4, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 ; KNL_X32-NEXT: korw %k7, %k0, %k0 -; KNL_X32-NEXT: kandw %k6, %k0, %k0 +; KNL_X32-NEXT: kandw %k5, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 ; KNL_X32-NEXT: korw %k7, %k0, %k0 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 -; KNL_X32-NEXT: kandw %k2, %k7, %k2 -; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k7 +; KNL_X32-NEXT: andl $1, %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; KNL_X32-NEXT: kmovw %ecx, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 -; KNL_X32-NEXT: korw %k7, %k2, %k2 -; KNL_X32-NEXT: kandw %k1, %k2, %k1 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: korw %k7, %k6, %k6 +; KNL_X32-NEXT: kandw %k1, %k6, %k1 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al -; KNL_X32-NEXT: kmovw %eax, %k2 -; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 -; KNL_X32-NEXT: kshiftrw $13, %k2, %k2 -; KNL_X32-NEXT: korw %k2, %k1, %k1 -; KNL_X32-NEXT: kandw %k3, %k1, %k1 +; KNL_X32-NEXT: kmovw %eax, %k6 +; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 +; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 +; KNL_X32-NEXT: korw %k6, %k1, %k1 +; KNL_X32-NEXT: kandw %k2, %k1, %k1 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $12, %k2, %k2 ; KNL_X32-NEXT: korw %k2, %k1, %k1 -; KNL_X32-NEXT: kandw %k4, %k1, %k1 +; KNL_X32-NEXT: kandw %k3, %k1, %k1 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $11, %k2, %k2 ; KNL_X32-NEXT: korw %k2, %k1, %k1 -; KNL_X32-NEXT: kandw %k5, %k1, %k1 +; KNL_X32-NEXT: kandw %k4, %k1, %k1 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $10, %k2, %k2 ; KNL_X32-NEXT: korw %k2, %k1, %k1 -; KNL_X32-NEXT: kandw %k6, %k1, %k1 +; KNL_X32-NEXT: kandw %k5, %k1, %k1 ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 @@ -3168,123 +3158,120 @@ ; FASTISEL-LABEL: test17: ; FASTISEL: ## %bb.0: ; FASTISEL-NEXT: movq %rdi, %rax -; FASTISEL-NEXT: movb $-3, %dil -; FASTISEL-NEXT: kmovd %edi, %k1 -; FASTISEL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; FASTISEL-NEXT: kandb %k1, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; FASTISEL-NEXT: kshiftlb $7, %k0, %k0 +; FASTISEL-NEXT: kshiftrb $6, %k0, %k0 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 -; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 -; FASTISEL-NEXT: korb %k1, %k0, %k0 +; FASTISEL-NEXT: kshiftrb $7, %k1, %k1 +; FASTISEL-NEXT: korb %k0, %k1, %k0 ; FASTISEL-NEXT: movb $-5, %dil +; FASTISEL-NEXT: kmovd %edi, %k3 +; FASTISEL-NEXT: kandb %k3, %k0, %k0 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 +; FASTISEL-NEXT: kshiftrb $5, %k1, %k1 +; FASTISEL-NEXT: korb %k1, %k0, %k0 +; FASTISEL-NEXT: movb $-9, %dil ; FASTISEL-NEXT: kmovd %edi, %k1 ; FASTISEL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; FASTISEL-NEXT: kandb %k1, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 -; FASTISEL-NEXT: kshiftrb $5, %k2, %k2 +; FASTISEL-NEXT: kshiftrb $4, %k2, %k2 ; FASTISEL-NEXT: korb %k2, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 -; FASTISEL-NEXT: movb $-9, %dil -; FASTISEL-NEXT: kmovd %edi, %k1 -; FASTISEL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; FASTISEL-NEXT: kandb %k1, %k0, %k0 -; FASTISEL-NEXT: kshiftlb $7, %k3, %k3 -; FASTISEL-NEXT: kshiftrb $4, %k3, %k3 -; FASTISEL-NEXT: korb %k3, %k0, %k0 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 ; FASTISEL-NEXT: movb $-17, %dil ; FASTISEL-NEXT: kmovd %edi, %k1 ; FASTISEL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; FASTISEL-NEXT: kandb %k1, %k0, %k0 +; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 +; FASTISEL-NEXT: kshiftrb $3, %k2, %k2 +; FASTISEL-NEXT: korb %k2, %k0, %k0 +; FASTISEL-NEXT: movb $-33, %dil +; FASTISEL-NEXT: kmovd %edi, %k5 +; FASTISEL-NEXT: kandb %k5, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 ; FASTISEL-NEXT: kshiftlb $7, %k4, %k4 -; FASTISEL-NEXT: kshiftrb $3, %k4, %k4 +; FASTISEL-NEXT: kshiftrb $2, %k4, %k4 ; FASTISEL-NEXT: korb %k4, %k0, %k0 -; FASTISEL-NEXT: movb $-33, %dil +; FASTISEL-NEXT: movb $-65, %dil ; FASTISEL-NEXT: kmovd %edi, %k1 ; FASTISEL-NEXT: kandb %k1, %k0, %k0 ; FASTISEL-NEXT: kmovq %k1, %k4 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 -; FASTISEL-NEXT: kshiftlb $7, %k5, %k5 -; FASTISEL-NEXT: kshiftrb $2, %k5, %k5 -; FASTISEL-NEXT: korb %k5, %k0, %k0 +; FASTISEL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 +; FASTISEL-NEXT: kshiftlb $7, %k6, %k6 +; FASTISEL-NEXT: kshiftrb $1, %k6, %k6 +; FASTISEL-NEXT: korb %k6, %k0, %k1 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 +; FASTISEL-NEXT: kshiftlb $7, %k6, %k6 +; FASTISEL-NEXT: kshiftrb $6, %k6, %k6 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 -; FASTISEL-NEXT: movb $-65, %dil -; FASTISEL-NEXT: kmovd %edi, %k6 -; FASTISEL-NEXT: kandb %k6, %k0, %k1 ; FASTISEL-NEXT: kshiftlb $7, %k7, %k7 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; FASTISEL-NEXT: kshiftrb $1, %k7, %k7 -; FASTISEL-NEXT: korb %k7, %k1, %k7 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k3, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 -; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 -; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; FASTISEL-NEXT: kshiftrb $7, %k7, %k7 +; FASTISEL-NEXT: korb %k6, %k7, %k6 +; FASTISEL-NEXT: kandb %k3, %k6, %k6 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 +; FASTISEL-NEXT: kshiftlb $7, %k7, %k7 +; FASTISEL-NEXT: kshiftrb $5, %k7, %k7 +; FASTISEL-NEXT: korb %k7, %k6, %k6 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k2, %k0, %k2 -; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 -; FASTISEL-NEXT: kshiftrb $5, %k1, %k1 +; FASTISEL-NEXT: kandb %k2, %k6, %k6 +; FASTISEL-NEXT: kshiftlb $7, %k7, %k7 +; FASTISEL-NEXT: kshiftrb $4, %k7, %k7 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; FASTISEL-NEXT: korb %k1, %k2, %k1 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k5, %k1, %k1 +; FASTISEL-NEXT: korb %k7, %k6, %k6 +; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; FASTISEL-NEXT: kandb %k7, %k6, %k6 ; FASTISEL-NEXT: kshiftlb $7, %k0, %k0 -; FASTISEL-NEXT: kshiftrb $4, %k0, %k0 -; FASTISEL-NEXT: korb %k0, %k1, %k0 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k2, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 -; FASTISEL-NEXT: kshiftrb $3, %k1, %k1 -; FASTISEL-NEXT: korb %k1, %k0, %k0 +; FASTISEL-NEXT: kshiftrb $3, %k0, %k0 +; FASTISEL-NEXT: korb %k0, %k6, %k0 +; FASTISEL-NEXT: kandb %k5, %k0, %k0 +; FASTISEL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 +; FASTISEL-NEXT: kshiftlb $7, %k6, %k6 +; FASTISEL-NEXT: kshiftrb $2, %k6, %k6 +; FASTISEL-NEXT: korb %k6, %k0, %k0 ; FASTISEL-NEXT: kandb %k4, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 -; FASTISEL-NEXT: kshiftrb $2, %k1, %k1 -; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kandb %k6, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 -; FASTISEL-NEXT: kshiftrb $1, %k1, %k1 -; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kandb %k7, %k0, %k0 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 +; FASTISEL-NEXT: kshiftlb $7, %k6, %k6 +; FASTISEL-NEXT: kshiftrb $1, %k6, %k6 +; FASTISEL-NEXT: korb %k6, %k0, %k0 +; FASTISEL-NEXT: kandb %k1, %k0, %k0 ; FASTISEL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; FASTISEL-NEXT: kandb %k3, %k0, %k0 +; FASTISEL-NEXT: kshiftlb $7, %k0, %k0 +; FASTISEL-NEXT: kshiftrb $6, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 -; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 -; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k7, %k0, %k0 +; FASTISEL-NEXT: kshiftrb $7, %k1, %k1 +; FASTISEL-NEXT: korb %k0, %k1, %k0 +; FASTISEL-NEXT: kmovq %k3, %k7 +; FASTISEL-NEXT: kandb %k3, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $5, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kandb %k5, %k0, %k0 +; FASTISEL-NEXT: kandb %k2, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $4, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kmovq %k2, %k3 -; FASTISEL-NEXT: kandb %k2, %k0, %k0 +; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload +; FASTISEL-NEXT: kandb %k4, %k0, %k0 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $3, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kandb %k4, %k0, %k0 -; FASTISEL-NEXT: kmovq %k4, %k5 -; FASTISEL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; FASTISEL-NEXT: kshiftlb $7, %k2, %k1 +; FASTISEL-NEXT: kandb %k5, %k0, %k0 +; FASTISEL-NEXT: kshiftlb $7, %k6, %k1 ; FASTISEL-NEXT: kshiftrb $2, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kmovq %k6, %k4 -; FASTISEL-NEXT: kandb %k6, %k0, %k0 +; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload +; FASTISEL-NEXT: kandb %k3, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $1, %k1, %k1 @@ -3292,141 +3279,141 @@ ; FASTISEL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k6, %k0, %k0 +; FASTISEL-NEXT: kshiftlb $7, %k0, %k0 +; FASTISEL-NEXT: kshiftrb $6, %k0, %k0 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 -; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; FASTISEL-NEXT: korb %k1, %k0, %k0 +; FASTISEL-NEXT: kshiftrb $7, %k1, %k1 +; FASTISEL-NEXT: korb %k0, %k1, %k0 ; FASTISEL-NEXT: kandb %k7, %k0, %k0 -; FASTISEL-NEXT: kshiftlb $7, %k2, %k1 +; FASTISEL-NEXT: kmovq %k7, %k5 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $5, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k7, %k0, %k0 +; FASTISEL-NEXT: kandb %k2, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $4, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kandb %k3, %k0, %k0 +; FASTISEL-NEXT: kandb %k4, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $3, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kandb %k5, %k0, %k0 +; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; FASTISEL-NEXT: kandb %k6, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $2, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kandb %k4, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; FASTISEL-NEXT: kandb %k3, %k0, %k0 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $1, %k1, %k1 -; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandb %k6, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 -; FASTISEL-NEXT: kshiftrb $6, %k2, %k2 -; FASTISEL-NEXT: korb %k2, %k1, %k1 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 +; FASTISEL-NEXT: korb %k1, %k0, %k2 +; FASTISEL-NEXT: kshiftlb $7, %k7, %k1 +; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 +; FASTISEL-NEXT: kshiftlb $7, %k7, %k7 +; FASTISEL-NEXT: kshiftrb $7, %k7, %k7 +; FASTISEL-NEXT: korb %k1, %k7, %k1 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 ; FASTISEL-NEXT: kandb %k5, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 -; FASTISEL-NEXT: kshiftrb $5, %k2, %k2 -; FASTISEL-NEXT: korb %k2, %k1, %k1 -; FASTISEL-NEXT: kandb %k7, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 -; FASTISEL-NEXT: kshiftrb $4, %k2, %k2 -; FASTISEL-NEXT: korb %k2, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; FASTISEL-NEXT: kandb %k3, %k1, %k1 -; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 -; FASTISEL-NEXT: kshiftrb $3, %k2, %k2 +; FASTISEL-NEXT: kshiftlb $7, %k7, %k7 +; FASTISEL-NEXT: kshiftrb $5, %k7, %k7 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; FASTISEL-NEXT: korb %k2, %k1, %k1 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k2, %k1, %k1 +; FASTISEL-NEXT: korb %k7, %k1, %k1 +; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; FASTISEL-NEXT: kandb %k7, %k1, %k1 ; FASTISEL-NEXT: kshiftlb $7, %k0, %k0 -; FASTISEL-NEXT: kshiftrb $2, %k0, %k0 +; FASTISEL-NEXT: kshiftrb $4, %k0, %k0 ; FASTISEL-NEXT: korb %k0, %k1, %k0 -; FASTISEL-NEXT: kmovq %k4, %k3 -; FASTISEL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; FASTISEL-NEXT: kandb %k4, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 -; FASTISEL-NEXT: kshiftrb $1, %k1, %k1 +; FASTISEL-NEXT: kshiftrb $3, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k1, %k0, %k0 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k1, %k0, %k0 -; FASTISEL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; FASTISEL-NEXT: kmovq %k6, %k4 ; FASTISEL-NEXT: kandb %k6, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 -; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 +; FASTISEL-NEXT: kshiftrb $2, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kandb %k5, %k0, %k0 +; FASTISEL-NEXT: kandb %k3, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 -; FASTISEL-NEXT: kshiftrb $5, %k1, %k1 +; FASTISEL-NEXT: kshiftrb $1, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kandb %k7, %k0, %k0 +; FASTISEL-NEXT: kandb %k2, %k0, %k0 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; FASTISEL-NEXT: kandb %k2, %k0, %k0 +; FASTISEL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; FASTISEL-NEXT: kshiftlb $7, %k1, %k0 +; FASTISEL-NEXT: kshiftrb $6, %k0, %k0 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 +; FASTISEL-NEXT: kshiftrb $7, %k1, %k1 +; FASTISEL-NEXT: korb %k0, %k1, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; FASTISEL-NEXT: kmovq %k5, %k3 +; FASTISEL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; FASTISEL-NEXT: kandb %k5, %k0, %k0 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 +; FASTISEL-NEXT: kshiftrb $5, %k1, %k1 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; FASTISEL-NEXT: korb %k1, %k0, %k0 +; FASTISEL-NEXT: kandb %k7, %k0, %k0 +; FASTISEL-NEXT: kshiftlb $7, %k2, %k1 ; FASTISEL-NEXT: kshiftrb $4, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k6, %k0, %k0 +; FASTISEL-NEXT: kmovq %k4, %k5 +; FASTISEL-NEXT: kandb %k4, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $3, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 +; FASTISEL-NEXT: kandb %k6, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 -; FASTISEL-NEXT: kandb %k2, %k0, %k0 -; FASTISEL-NEXT: kmovq %k2, %k7 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $2, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kandb %k3, %k0, %k0 -; FASTISEL-NEXT: kshiftlb $7, %k2, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload +; FASTISEL-NEXT: kandb %k4, %k0, %k0 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $1, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kandb %k4, %k2, %k1 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 +; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 -; FASTISEL-NEXT: kshiftrb $6, %k2, %k2 -; FASTISEL-NEXT: korb %k2, %k1, %k1 +; FASTISEL-NEXT: kshiftrb $7, %k2, %k2 +; FASTISEL-NEXT: korb %k1, %k2, %k1 +; FASTISEL-NEXT: kandb %k3, %k1, %k1 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; FASTISEL-NEXT: kandb %k5, %k1, %k1 ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 ; FASTISEL-NEXT: kshiftrb $5, %k2, %k2 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 ; FASTISEL-NEXT: korb %k2, %k1, %k1 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k4, %k1, %k1 -; FASTISEL-NEXT: kshiftlb $7, %k3, %k2 +; FASTISEL-NEXT: kandb %k7, %k1, %k1 +; FASTISEL-NEXT: kmovq %k7, %k3 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 ; FASTISEL-NEXT: kshiftrb $4, %k2, %k2 ; FASTISEL-NEXT: korb %k2, %k1, %k1 -; FASTISEL-NEXT: kandb %k6, %k1, %k1 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; FASTISEL-NEXT: kandb %k5, %k1, %k1 ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 ; FASTISEL-NEXT: kshiftrb $3, %k2, %k2 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 ; FASTISEL-NEXT: korb %k2, %k1, %k1 -; FASTISEL-NEXT: kandb %k7, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 +; FASTISEL-NEXT: kandb %k6, %k1, %k1 +; FASTISEL-NEXT: kmovq %k6, %k5 +; FASTISEL-NEXT: kshiftlb $7, %k7, %k2 ; FASTISEL-NEXT: kshiftrb $2, %k2, %k2 ; FASTISEL-NEXT: korb %k2, %k1, %k1 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k3, %k1, %k1 +; FASTISEL-NEXT: kandb %k4, %k1, %k1 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 ; FASTISEL-NEXT: kshiftrb $1, %k2, %k2 @@ -3434,52 +3421,52 @@ ; FASTISEL-NEXT: kandb %k0, %k1, %k0 ; FASTISEL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k5, %k0, %k0 +; FASTISEL-NEXT: kshiftlb $7, %k0, %k0 +; FASTISEL-NEXT: kshiftrb $6, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 -; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 -; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k1, %k0, %k0 +; FASTISEL-NEXT: kshiftrb $7, %k1, %k1 +; FASTISEL-NEXT: korb %k0, %k1, %k0 +; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; FASTISEL-NEXT: kandb %k7, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $5, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kandb %k4, %k0, %k0 +; FASTISEL-NEXT: kandb %k3, %k0, %k0 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $4, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; FASTISEL-NEXT: kandb %k6, %k0, %k0 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 ; FASTISEL-NEXT: kshiftrb $3, %k1, %k1 -; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 -; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kandb %k7, %k0, %k0 -; FASTISEL-NEXT: kshiftlb $7, %k2, %k1 -; FASTISEL-NEXT: kshiftrb $2, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kandb %k3, %k0, %k0 -; FASTISEL-NEXT: kmovq %k3, %k7 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 +; FASTISEL-NEXT: kandb %k5, %k0, %k0 ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 +; FASTISEL-NEXT: kshiftrb $2, %k1, %k1 +; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 +; FASTISEL-NEXT: korb %k1, %k0, %k0 +; FASTISEL-NEXT: kandb %k4, %k0, %k0 +; FASTISEL-NEXT: kshiftlb $7, %k2, %k1 ; FASTISEL-NEXT: kshiftrb $1, %k1, %k1 ; FASTISEL-NEXT: korb %k1, %k0, %k0 -; FASTISEL-NEXT: kmovd %esi, %k1 -; FASTISEL-NEXT: kandb %k5, %k1, %k1 -; FASTISEL-NEXT: kmovd %edx, %k2 +; FASTISEL-NEXT: kmovd %edx, %k1 +; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 +; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 +; FASTISEL-NEXT: kmovd %esi, %k2 ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 -; FASTISEL-NEXT: kshiftrb $6, %k2, %k2 -; FASTISEL-NEXT: korb %k2, %k1, %k1 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k2, %k1, %k1 +; FASTISEL-NEXT: kshiftrb $7, %k2, %k2 +; FASTISEL-NEXT: korb %k1, %k2, %k1 +; FASTISEL-NEXT: kandb %k7, %k1, %k1 ; FASTISEL-NEXT: kmovd %ecx, %k2 ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 ; FASTISEL-NEXT: kshiftrb $5, %k2, %k2 ; FASTISEL-NEXT: korb %k2, %k1, %k1 -; FASTISEL-NEXT: kandb %k4, %k1, %k1 +; FASTISEL-NEXT: kandb %k3, %k1, %k1 ; FASTISEL-NEXT: kmovd %r8d, %k2 ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 ; FASTISEL-NEXT: kshiftrb $4, %k2, %k2 @@ -3490,12 +3477,11 @@ ; FASTISEL-NEXT: kshiftrb $3, %k2, %k2 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 ; FASTISEL-NEXT: korb %k2, %k1, %k1 -; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; FASTISEL-NEXT: kandb %k2, %k1, %k1 +; FASTISEL-NEXT: kandb %k5, %k1, %k1 ; FASTISEL-NEXT: kshiftlb $7, %k3, %k2 ; FASTISEL-NEXT: kshiftrb $2, %k2, %k2 ; FASTISEL-NEXT: korb %k2, %k1, %k1 -; FASTISEL-NEXT: kandb %k7, %k1, %k1 +; FASTISEL-NEXT: kandb %k4, %k1, %k1 ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 ; FASTISEL-NEXT: kshiftrb $1, %k2, %k2 diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll --- a/llvm/test/CodeGen/X86/avx512-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -1886,13 +1886,9 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-LABEL: test21: ; KNL: # %bb.0: -; KNL-NEXT: movw $-3, %ax -; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k2 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 @@ -1900,8 +1896,9 @@ ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-5, %ax ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k1, %k7 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 @@ -1917,9 +1914,9 @@ ; KNL-NEXT: kshiftrw $12, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-17, %ax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 @@ -1928,7 +1925,7 @@ ; KNL-NEXT: movw $-33, %ax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k3 +; KNL-NEXT: kmovw %k1, %k2 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 @@ -1947,7 +1944,7 @@ ; KNL-NEXT: movw $-129, %ax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k4 +; KNL-NEXT: kmovw %k1, %k3 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 @@ -1966,7 +1963,7 @@ ; KNL-NEXT: movw $-513, %ax # imm = 0xFDFF ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k5 +; KNL-NEXT: kmovw %k1, %k4 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 @@ -1983,9 +1980,8 @@ ; KNL-NEXT: kshiftrw $5, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-2049, %ax # imm = 0xF7FF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k5 +; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 @@ -2014,218 +2010,216 @@ ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kandw %k0, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $14, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: kshiftrw $1, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kandw %k2, %k1, %k1 -; KNL-NEXT: kmovw %esi, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: andl $1, %edi +; KNL-NEXT: kmovw %esi, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $14, %k1, %k1 +; KNL-NEXT: kmovw %edi, %k6 +; KNL-NEXT: korw %k1, %k6, %k1 +; KNL-NEXT: kandw %k7, %k1, %k1 +; KNL-NEXT: kmovw %edx, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $13, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload +; KNL-NEXT: kandw %k7, %k1, %k1 +; KNL-NEXT: kmovw %ecx, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $12, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload ; KNL-NEXT: kandw %k0, %k1, %k1 -; KNL-NEXT: kmovw %edx, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $13, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %r8d, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $11, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 +; KNL-NEXT: kandw %k2, %k1, %k1 +; KNL-NEXT: kmovw %r9d, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $10, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload ; KNL-NEXT: kandw %k2, %k1, %k1 -; KNL-NEXT: kmovw %ecx, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $12, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: kandw %k6, %k1, %k1 -; KNL-NEXT: kmovw %r8d, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $11, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: kandw %k3, %k1, %k1 -; KNL-NEXT: kmovw %r9d, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; KNL-NEXT: kandw %k6, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: kandw %k4, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $9, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 +; KNL-NEXT: kandw %k3, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $8, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $8, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload ; KNL-NEXT: kandw %k3, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $7, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: kandw %k5, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $7, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 +; KNL-NEXT: kandw %k4, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $6, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $6, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload ; KNL-NEXT: kandw %k4, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $5, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $5, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 +; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kandw %k5, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $4, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; KNL-NEXT: kandw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $4, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload +; KNL-NEXT: kandw %k6, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $3, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; KNL-NEXT: kandw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $3, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload +; KNL-NEXT: kandw %k6, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $2, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; KNL-NEXT: kandw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $2, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload +; KNL-NEXT: kandw %k6, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $14, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: kshiftrw $1, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl +; KNL-NEXT: kmovw %ecx, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $14, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: korw %k1, %k6, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload +; KNL-NEXT: kandw %k6, %k1, %k1 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $13, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k7, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $12, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k0, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $13, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: kandw %k2, %k1, %k1 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $12, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $11, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload ; KNL-NEXT: kandw %k0, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $11, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $10, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k2, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: kandw %k6, %k1, %k1 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; KNL-NEXT: kandw %k2, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $9, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; KNL-NEXT: kandw %k0, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $8, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $8, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k3, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $7, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $7, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload ; KNL-NEXT: kandw %k3, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $6, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $6, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k4, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $5, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $5, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k5, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $4, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $4, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; KNL-NEXT: kandw %k0, %k1, %k1 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $3, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload ; KNL-NEXT: kandw %k2, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $3, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; KNL-NEXT: kandw %k5, %k1, %k1 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $2, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $2, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; KNL-NEXT: kandw %k5, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $14, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: kshiftrw $1, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: korw %k7, %k1, %k1 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; KNL-NEXT: kandw %k5, %k7, %k7 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: korw %k6, %k1, %k1 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl +; KNL-NEXT: kmovw %ecx, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $14, %k6, %k6 +; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: korw %k6, %k7, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 @@ -2241,35 +2235,36 @@ ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $12, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kandw %k0, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $11, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; KNL-NEXT: kandw %k0, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $10, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; KNL-NEXT: kandw %k0, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $9, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; KNL-NEXT: kandw %k0, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $8, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; KNL-NEXT: kandw %k0, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 @@ -2287,21 +2282,20 @@ ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $5, %k6, %k6 ; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; KNL-NEXT: kandw %k0, %k5, %k4 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload +; KNL-NEXT: kandw %k3, %k5, %k4 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k5 ; KNL-NEXT: kshiftlw $15, %k5, %k5 ; KNL-NEXT: kshiftrw $4, %k5, %k5 ; KNL-NEXT: korw %k5, %k4, %k4 -; KNL-NEXT: kandw %k2, %k4, %k3 +; KNL-NEXT: kandw %k0, %k4, %k3 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k4 ; KNL-NEXT: kshiftlw $15, %k4, %k4 ; KNL-NEXT: kshiftrw $3, %k4, %k4 ; KNL-NEXT: korw %k4, %k3, %k3 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload -; KNL-NEXT: kandw %k0, %k3, %k2 +; KNL-NEXT: kandw %k2, %k3, %k2 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k3 ; KNL-NEXT: kshiftlw $15, %k3, %k3 @@ -2346,13 +2340,9 @@ ; ; AVX512DQNOBW-LABEL: test21: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: movw $-3, %ax -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQNOBW-NEXT: andl $1, %eax ; AVX512DQNOBW-NEXT: kmovw %eax, %k0 -; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %k1, %k2 -; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 @@ -2360,8 +2350,9 @@ ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movw $-5, %ax ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %k1, %k7 +; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 @@ -2377,9 +2368,9 @@ ; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movw $-17, %ax -; AVX512DQNOBW-NEXT: kmovw %eax, %k6 -; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; AVX512DQNOBW-NEXT: kmovw %eax, %k1 +; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 @@ -2388,7 +2379,7 @@ ; AVX512DQNOBW-NEXT: movw $-33, %ax ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %k1, %k3 +; AVX512DQNOBW-NEXT: kmovw %k1, %k2 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 @@ -2407,7 +2398,7 @@ ; AVX512DQNOBW-NEXT: movw $-129, %ax ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %k1, %k4 +; AVX512DQNOBW-NEXT: kmovw %k1, %k3 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 @@ -2426,7 +2417,7 @@ ; AVX512DQNOBW-NEXT: movw $-513, %ax # imm = 0xFDFF ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %k1, %k5 +; AVX512DQNOBW-NEXT: kmovw %k1, %k4 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 @@ -2443,9 +2434,8 @@ ; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k1 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movw $-2049, %ax # imm = 0xF7FF -; AVX512DQNOBW-NEXT: kmovw %eax, %k1 -; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k5 +; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 @@ -2474,218 +2464,216 @@ ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: kmovw %edi, %k0 -; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %esi, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $14, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: andl $1, %edi +; AVX512DQNOBW-NEXT: kmovw %esi, %k0 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0 +; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %edi, %k6 +; AVX512DQNOBW-NEXT: korw %k0, %k6, %k0 +; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %edx, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $13, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %ecx, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $12, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %edx, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %r8d, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $11, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %r9d, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $10, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %ecx, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %r8d, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw %r9d, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $9, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $8, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $7, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 +; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $6, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $5, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $4, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $4, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $3, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $3, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $2, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $2, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k0 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload +; AVX512DQNOBW-NEXT: andl $1, %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX512DQNOBW-NEXT: kmovw %ecx, %k0 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0 +; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: korw %k0, %k6, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $13, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $14, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $12, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $11, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $10, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 -; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $9, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $8, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $7, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $6, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $5, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $4, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $4, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $3, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $3, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 -; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: kshiftrw $2, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: kshiftrw $2, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $14, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %eax, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQNOBW-NEXT: kmovw %eax, %k7 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k5, %k7, %k7 -; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQNOBW-NEXT: andl $1, %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX512DQNOBW-NEXT: kmovw %ecx, %k6 +; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k6, %k6 +; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: korw %k6, %k7, %k6 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 @@ -2701,35 +2689,36 @@ ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 @@ -2747,21 +2736,20 @@ ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k5, %k5 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k1, %k5, %k4 +; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload +; AVX512DQNOBW-NEXT: kandw %k3, %k5, %k4 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k5 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k5, %k5 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k5, %k5 ; AVX512DQNOBW-NEXT: korw %k5, %k4, %k4 -; AVX512DQNOBW-NEXT: kandw %k2, %k4, %k3 +; AVX512DQNOBW-NEXT: kandw %k1, %k4, %k3 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k4 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k4, %k4 ; AVX512DQNOBW-NEXT: korw %k4, %k3, %k3 -; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload -; AVX512DQNOBW-NEXT: kandw %k1, %k3, %k2 +; AVX512DQNOBW-NEXT: kandw %k2, %k3, %k2 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k3 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3 diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -2915,20 +2915,17 @@ ; ; KNL-LABEL: store_64i1: ; KNL: ## %bb.0: -; KNL-NEXT: movw $-3, %ax -; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: andl $1, %esi ; KNL-NEXT: kmovw %esi, %k0 -; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k2 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kmovw %edx, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $14, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-5, %ax ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k1, %k7 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $13, %k1, %k1 @@ -2942,9 +2939,9 @@ ; KNL-NEXT: kshiftrw $12, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-17, %ax -; KNL-NEXT: kmovw %eax, %k6 -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %r9d, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $11, %k1, %k1 @@ -2952,7 +2949,7 @@ ; KNL-NEXT: movw $-33, %ax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k3 +; KNL-NEXT: kmovw %k1, %k2 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 @@ -2971,7 +2968,7 @@ ; KNL-NEXT: movw $-129, %ax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k4 +; KNL-NEXT: kmovw %k1, %k3 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 @@ -2990,7 +2987,7 @@ ; KNL-NEXT: movw $-513, %ax ## imm = 0xFDFF ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: kmovw %k1, %k5 +; KNL-NEXT: kmovw %k1, %k4 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 @@ -3007,9 +3004,8 @@ ; KNL-NEXT: kshiftrw $5, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-2049, %ax ## imm = 0xF7FF -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k5 +; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 @@ -3038,224 +3034,222 @@ ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $14, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: kandw %k2, %k0, %k0 +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl +; KNL-NEXT: kmovw %ecx, %k0 +; KNL-NEXT: kshiftlw $15, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: korw %k0, %k6, %k0 +; KNL-NEXT: kandw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $13, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; KNL-NEXT: kandw %k7, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $12, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $13, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $11, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $12, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k6, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $10, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $11, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $9, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $8, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $8, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $7, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k5, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $7, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $6, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $6, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload ; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $5, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $5, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $4, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $4, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; KNL-NEXT: kandw %k6, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $3, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $3, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; KNL-NEXT: kandw %k6, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $2, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; KNL-NEXT: kandw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $2, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; KNL-NEXT: kandw %k6, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $14, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl +; KNL-NEXT: kmovw %ecx, %k0 +; KNL-NEXT: kshiftlw $15, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: korw %k0, %k6, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; KNL-NEXT: kandw %k6, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $13, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $12, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $13, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $12, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $11, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $11, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $10, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $10, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $9, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; KNL-NEXT: kandw %k2, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $9, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $8, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $8, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $7, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $7, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $6, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $6, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $5, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $5, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $4, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $4, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $3, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $3, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: kshiftrw $2, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: kshiftrw $2, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $14, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 +; KNL-NEXT: kmovw %eax, %k6 +; KNL-NEXT: kshiftlw $14, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kshiftlw $15, %k7, %k7 -; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al -; KNL-NEXT: kmovw %eax, %k7 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; KNL-NEXT: kandw %k5, %k7, %k7 -; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 +; KNL-NEXT: korw %k6, %k0, %k0 +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl +; KNL-NEXT: kmovw %ecx, %k6 +; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $14, %k6, %k6 +; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: korw %k6, %k7, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 @@ -3271,35 +3265,36 @@ ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $12, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kandw %k1, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $11, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $10, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $9, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $8, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k6, %k6 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 @@ -3317,21 +3312,20 @@ ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $5, %k6, %k6 ; KNL-NEXT: korw %k6, %k5, %k5 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k5, %k4 +; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload +; KNL-NEXT: kandw %k3, %k5, %k4 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k5 ; KNL-NEXT: kshiftlw $15, %k5, %k5 ; KNL-NEXT: kshiftrw $4, %k5, %k5 ; KNL-NEXT: korw %k5, %k4, %k4 -; KNL-NEXT: kandw %k2, %k4, %k3 +; KNL-NEXT: kandw %k1, %k4, %k3 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k4 ; KNL-NEXT: kshiftlw $15, %k4, %k4 ; KNL-NEXT: kshiftrw $3, %k4, %k4 ; KNL-NEXT: korw %k4, %k3, %k3 -; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; KNL-NEXT: kandw %k1, %k3, %k2 +; KNL-NEXT: kandw %k2, %k3, %k2 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k3 ; KNL-NEXT: kshiftlw $15, %k3, %k3 @@ -3375,20 +3369,17 @@ ; ; AVX512DQ-LABEL: store_64i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: movw $-3, %ax -; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: andl $1, %esi ; AVX512DQ-NEXT: kmovw %esi, %k0 -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k1, %k2 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kmovw %edx, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $14, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-5, %ax ; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 +; AVX512DQ-NEXT: kmovw %k1, %k7 +; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kmovw %ecx, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $13, %k1, %k1 @@ -3402,9 +3393,9 @@ ; AVX512DQ-NEXT: kshiftrw $12, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-17, %ax -; AVX512DQ-NEXT: kmovw %eax, %k6 -; AVX512DQ-NEXT: kandw %k6, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %r9d, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $11, %k1, %k1 @@ -3412,7 +3403,7 @@ ; AVX512DQ-NEXT: movw $-33, %ax ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k1, %k3 +; AVX512DQ-NEXT: kmovw %k1, %k2 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 @@ -3431,7 +3422,7 @@ ; AVX512DQ-NEXT: movw $-129, %ax ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k1, %k4 +; AVX512DQ-NEXT: kmovw %k1, %k3 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 @@ -3450,7 +3441,7 @@ ; AVX512DQ-NEXT: movw $-513, %ax ## imm = 0xFDFF ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k1, %k5 +; AVX512DQ-NEXT: kmovw %k1, %k4 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 @@ -3467,9 +3458,8 @@ ; AVX512DQ-NEXT: kshiftrw $5, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-2049, %ax ## imm = 0xF7FF -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: kandw %k1, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k5 +; AVX512DQ-NEXT: kandw %k5, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 @@ -3498,224 +3488,222 @@ ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $14, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k0 -; AVX512DQ-NEXT: kandw %k2, %k0, %k0 +; AVX512DQ-NEXT: andl $1, %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX512DQ-NEXT: kmovw %ecx, %k0 +; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0 +; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: korw %k0, %k6, %k0 +; AVX512DQ-NEXT: kandw %k7, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $14, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $13, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k7, %k0, %k0 +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $12, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $13, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $11, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $12, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: kandw %k6, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $10, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $11, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $9, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k3, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $10, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k6, %k0, %k0 -; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $9, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: kandw %k4, %k0, %k0 -; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $8, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $8, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k3, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $7, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: kandw %k5, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $7, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 +; AVX512DQ-NEXT: kandw %k4, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $6, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $6, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k4, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $5, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 +; AVX512DQ-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k5, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $4, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $4, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k6, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $3, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $3, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k6, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $2, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $2, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k6, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $14, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload +; AVX512DQ-NEXT: andl $1, %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX512DQ-NEXT: kmovw %ecx, %k0 +; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0 +; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: korw %k0, %k6, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k6, %k0, %k0 +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $13, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k7, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $14, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $12, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $13, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: kandw %k2, %k0, %k0 -; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $12, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $11, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $11, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $10, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $10, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: kandw %k6, %k0, %k0 -; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $9, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k2, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $9, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $8, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $8, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k3, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $7, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $7, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k3, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $6, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $6, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k4, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $5, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k5, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $4, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $4, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $3, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $3, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k5, %k0, %k0 -; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: kshiftrw $2, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: kshiftrw $2, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $14, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 +; AVX512DQ-NEXT: kmovw %eax, %k6 +; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 -; AVX512DQ-NEXT: korw %k7, %k0, %k0 -; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al -; AVX512DQ-NEXT: kmovw %eax, %k7 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k5, %k7, %k7 -; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 +; AVX512DQ-NEXT: korw %k6, %k0, %k0 +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al +; AVX512DQ-NEXT: andl $1, %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX512DQ-NEXT: kmovw %ecx, %k6 +; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $14, %k6, %k6 +; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: korw %k6, %k7, %k6 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k6, %k6 @@ -3731,35 +3719,36 @@ ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $12, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 -; AVX512DQ-NEXT: kandw %k1, %k6, %k6 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k5, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $11, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k1, %k6, %k6 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k5, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $10, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k1, %k6, %k6 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k5, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $9, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k1, %k6, %k6 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k5, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $8, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k1, %k6, %k6 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k5, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 @@ -3777,21 +3766,20 @@ ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k5, %k5 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k1, %k5, %k4 +; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload +; AVX512DQ-NEXT: kandw %k3, %k5, %k4 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k5 ; AVX512DQ-NEXT: kshiftlw $15, %k5, %k5 ; AVX512DQ-NEXT: kshiftrw $4, %k5, %k5 ; AVX512DQ-NEXT: korw %k5, %k4, %k4 -; AVX512DQ-NEXT: kandw %k2, %k4, %k3 +; AVX512DQ-NEXT: kandw %k1, %k4, %k3 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k4 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 ; AVX512DQ-NEXT: kshiftrw $3, %k4, %k4 ; AVX512DQ-NEXT: korw %k4, %k3, %k3 -; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload -; AVX512DQ-NEXT: kandw %k1, %k3, %k2 +; AVX512DQ-NEXT: kandw %k2, %k3, %k2 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k3 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3 diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -1130,11 +1130,10 @@ ; CHECK-NEXT: vmovdqa 32(%rdi), %ymm0 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k0 ; CHECK-NEXT: kshiftrd $1, %k0, %k1 -; CHECK-NEXT: movq $-3, %rax -; CHECK-NEXT: kmovq %rax, %k2 -; CHECK-NEXT: kandq %k2, %k1, %k1 ; CHECK-NEXT: kshiftlq $63, %k0, %k2 ; CHECK-NEXT: kshiftrq $62, %k2, %k2 +; CHECK-NEXT: kshiftlq $63, %k1, %k1 +; CHECK-NEXT: kshiftrq $63, %k1, %k1 ; CHECK-NEXT: korq %k2, %k1, %k1 ; CHECK-NEXT: movq $-5, %rax ; CHECK-NEXT: kmovq %rax, %k2 diff --git a/llvm/test/CodeGen/X86/vec-strict-cmp-128-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-cmp-128-fp16.ll --- a/llvm/test/CodeGen/X86/vec-strict-cmp-128-fp16.ll +++ b/llvm/test/CodeGen/X86/vec-strict-cmp-128-fp16.ll @@ -709,15 +709,14 @@ ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $16, %esp -; X86-NEXT: movb $-3, %al -; X86-NEXT: kmovd %eax, %k0 ; X86-NEXT: vucomish 8(%ebp), %xmm2 ; X86-NEXT: setnp %al ; X86-NEXT: sete %cl ; X86-NEXT: testb %al, %cl ; X86-NEXT: setne %al -; X86-NEXT: kmovd %eax, %k1 -; X86-NEXT: kandb %k0, %k1, %k0 +; X86-NEXT: kmovd %eax, %k0 +; X86-NEXT: kshiftlb $7, %k0, %k0 +; X86-NEXT: kshiftrb $7, %k0, %k0 ; X86-NEXT: vpsrld $16, %xmm2, %xmm2 ; X86-NEXT: vucomish 10(%ebp), %xmm2 ; X86-NEXT: setnp %al @@ -741,9 +740,8 @@ ; X64-NEXT: testb %al, %cl ; X64-NEXT: setne %al ; X64-NEXT: kmovd %eax, %k0 -; X64-NEXT: movb $-3, %al -; X64-NEXT: kmovd %eax, %k1 -; X64-NEXT: kandb %k1, %k0, %k0 +; X64-NEXT: kshiftlb $7, %k0, %k0 +; X64-NEXT: kshiftrb $7, %k0, %k0 ; X64-NEXT: vpsrld $16, %xmm3, %xmm3 ; X64-NEXT: vpsrld $16, %xmm2, %xmm2 ; X64-NEXT: vucomish %xmm3, %xmm2 @@ -771,12 +769,11 @@ ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $16, %esp -; X86-NEXT: movb $-3, %al -; X86-NEXT: kmovd %eax, %k0 ; X86-NEXT: vcomish 8(%ebp), %xmm2 ; X86-NEXT: seta %al -; X86-NEXT: kmovd %eax, %k1 -; X86-NEXT: kandb %k0, %k1, %k0 +; X86-NEXT: kmovd %eax, %k0 +; X86-NEXT: kshiftlb $7, %k0, %k0 +; X86-NEXT: kshiftrb $7, %k0, %k0 ; X86-NEXT: vpsrld $16, %xmm2, %xmm2 ; X86-NEXT: vcomish 10(%ebp), %xmm2 ; X86-NEXT: seta %al @@ -791,12 +788,11 @@ ; ; X64-LABEL: test_v2f16_ogt_q: ; X64: # %bb.0: -; X64-NEXT: movb $-3, %al -; X64-NEXT: kmovd %eax, %k0 ; X64-NEXT: vcomish %xmm3, %xmm2 ; X64-NEXT: seta %al -; X64-NEXT: kmovd %eax, %k1 -; X64-NEXT: kandb %k0, %k1, %k0 +; X64-NEXT: kmovd %eax, %k0 +; X64-NEXT: kshiftlb $7, %k0, %k0 +; X64-NEXT: kshiftrb $7, %k0, %k0 ; X64-NEXT: vpsrld $16, %xmm3, %xmm3 ; X64-NEXT: vpsrld $16, %xmm2, %xmm2 ; X64-NEXT: vcomish %xmm3, %xmm2 @@ -821,12 +817,11 @@ ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $16, %esp -; X86-NEXT: movb $-3, %al -; X86-NEXT: kmovd %eax, %k0 ; X86-NEXT: vucomish 8(%ebp), %xmm2 ; X86-NEXT: setae %al -; X86-NEXT: kmovd %eax, %k1 -; X86-NEXT: kandb %k0, %k1, %k0 +; X86-NEXT: kmovd %eax, %k0 +; X86-NEXT: kshiftlb $7, %k0, %k0 +; X86-NEXT: kshiftrb $7, %k0, %k0 ; X86-NEXT: vpsrld $16, %xmm2, %xmm3 ; X86-NEXT: vucomish 10(%ebp), %xmm3 ; X86-NEXT: setae %al @@ -861,12 +856,11 @@ ; ; X64-LABEL: test_v4f16_oge_q: ; X64: # %bb.0: -; X64-NEXT: movb $-3, %al -; X64-NEXT: kmovd %eax, %k0 ; X64-NEXT: vucomish %xmm3, %xmm2 ; X64-NEXT: setae %al -; X64-NEXT: kmovd %eax, %k1 -; X64-NEXT: kandb %k0, %k1, %k0 +; X64-NEXT: kmovd %eax, %k0 +; X64-NEXT: kshiftlb $7, %k0, %k0 +; X64-NEXT: kshiftrb $7, %k0, %k0 ; X64-NEXT: vpsrld $16, %xmm3, %xmm4 ; X64-NEXT: vpsrld $16, %xmm2, %xmm5 ; X64-NEXT: vucomish %xmm4, %xmm5 @@ -913,13 +907,12 @@ ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $16, %esp -; X86-NEXT: movb $-3, %al -; X86-NEXT: kmovd %eax, %k0 ; X86-NEXT: vmovsh 8(%ebp), %xmm3 ; X86-NEXT: vcomish %xmm2, %xmm3 ; X86-NEXT: seta %al -; X86-NEXT: kmovd %eax, %k1 -; X86-NEXT: kandb %k0, %k1, %k0 +; X86-NEXT: kmovd %eax, %k0 +; X86-NEXT: kshiftlb $7, %k0, %k0 +; X86-NEXT: kshiftrb $7, %k0, %k0 ; X86-NEXT: vpsrld $16, %xmm2, %xmm3 ; X86-NEXT: vmovsh 10(%ebp), %xmm4 ; X86-NEXT: vcomish %xmm3, %xmm4 @@ -957,12 +950,11 @@ ; ; X64-LABEL: test_v4f16_olt_q: ; X64: # %bb.0: -; X64-NEXT: movb $-3, %al -; X64-NEXT: kmovd %eax, %k0 ; X64-NEXT: vcomish %xmm2, %xmm3 ; X64-NEXT: seta %al -; X64-NEXT: kmovd %eax, %k1 -; X64-NEXT: kandb %k0, %k1, %k0 +; X64-NEXT: kmovd %eax, %k0 +; X64-NEXT: kshiftlb $7, %k0, %k0 +; X64-NEXT: kshiftrb $7, %k0, %k0 ; X64-NEXT: vpsrld $16, %xmm2, %xmm4 ; X64-NEXT: vpsrld $16, %xmm3, %xmm5 ; X64-NEXT: vcomish %xmm4, %xmm5 diff --git a/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll b/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll --- a/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll @@ -97,12 +97,10 @@ ; AVX512-32-NEXT: movl %esp, %ebp ; AVX512-32-NEXT: andl $-16, %esp ; AVX512-32-NEXT: subl $16, %esp -; AVX512-32-NEXT: movw $-3, %ax -; AVX512-32-NEXT: kmovw %eax, %k0 ; AVX512-32-NEXT: vcomiss 8(%ebp), %xmm2 ; AVX512-32-NEXT: seta %al -; AVX512-32-NEXT: kmovw %eax, %k1 -; AVX512-32-NEXT: kandw %k0, %k1, %k0 +; AVX512-32-NEXT: andl $1, %eax +; AVX512-32-NEXT: kmovw %eax, %k0 ; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX512-32-NEXT: vcomiss 12(%ebp), %xmm2 ; AVX512-32-NEXT: seta %al @@ -117,12 +115,10 @@ ; ; AVX512-64-LABEL: test_v2f32_ogt_s: ; AVX512-64: # %bb.0: -; AVX512-64-NEXT: movw $-3, %ax -; AVX512-64-NEXT: kmovw %eax, %k0 ; AVX512-64-NEXT: vcomiss %xmm3, %xmm2 ; AVX512-64-NEXT: seta %al -; AVX512-64-NEXT: kmovw %eax, %k1 -; AVX512-64-NEXT: kandw %k0, %k1, %k0 +; AVX512-64-NEXT: andl $1, %eax +; AVX512-64-NEXT: kmovw %eax, %k0 ; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] ; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX512-64-NEXT: vcomiss %xmm3, %xmm2 @@ -142,12 +138,10 @@ ; AVX512F-32-NEXT: subl $16, %esp ; AVX512F-32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-32-NEXT: movw $-3, %ax -; AVX512F-32-NEXT: kmovw %eax, %k0 ; AVX512F-32-NEXT: vcomiss 8(%ebp), %xmm2 ; AVX512F-32-NEXT: seta %al -; AVX512F-32-NEXT: kmovw %eax, %k1 -; AVX512F-32-NEXT: kandw %k0, %k1, %k0 +; AVX512F-32-NEXT: andl $1, %eax +; AVX512F-32-NEXT: kmovw %eax, %k0 ; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX512F-32-NEXT: vcomiss 12(%ebp), %xmm2 ; AVX512F-32-NEXT: seta %al @@ -166,12 +160,10 @@ ; AVX512F-64: # %bb.0: ; AVX512F-64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-64-NEXT: movw $-3, %ax -; AVX512F-64-NEXT: kmovw %eax, %k0 ; AVX512F-64-NEXT: vcomiss %xmm3, %xmm2 ; AVX512F-64-NEXT: seta %al -; AVX512F-64-NEXT: kmovw %eax, %k1 -; AVX512F-64-NEXT: kandw %k0, %k1, %k0 +; AVX512F-64-NEXT: andl $1, %eax +; AVX512F-64-NEXT: kmovw %eax, %k0 ; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] ; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX512F-64-NEXT: vcomiss %xmm3, %xmm2 @@ -288,15 +280,13 @@ ; AVX512-32-NEXT: movl %esp, %ebp ; AVX512-32-NEXT: andl $-16, %esp ; AVX512-32-NEXT: subl $16, %esp -; AVX512-32-NEXT: movw $-3, %ax -; AVX512-32-NEXT: kmovw %eax, %k0 ; AVX512-32-NEXT: vucomiss 8(%ebp), %xmm2 ; AVX512-32-NEXT: setnp %al ; AVX512-32-NEXT: sete %cl ; AVX512-32-NEXT: testb %al, %cl ; AVX512-32-NEXT: setne %al -; AVX512-32-NEXT: kmovw %eax, %k1 -; AVX512-32-NEXT: kandw %k0, %k1, %k0 +; AVX512-32-NEXT: andl $1, %eax +; AVX512-32-NEXT: kmovw %eax, %k0 ; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX512-32-NEXT: vucomiss 12(%ebp), %xmm2 ; AVX512-32-NEXT: setnp %al @@ -319,10 +309,8 @@ ; AVX512-64-NEXT: sete %cl ; AVX512-64-NEXT: testb %al, %cl ; AVX512-64-NEXT: setne %al +; AVX512-64-NEXT: andl $1, %eax ; AVX512-64-NEXT: kmovw %eax, %k0 -; AVX512-64-NEXT: movw $-3, %ax -; AVX512-64-NEXT: kmovw %eax, %k1 -; AVX512-64-NEXT: kandw %k1, %k0, %k0 ; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] ; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX512-64-NEXT: vucomiss %xmm3, %xmm2 @@ -345,15 +333,13 @@ ; AVX512F-32-NEXT: subl $16, %esp ; AVX512F-32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-32-NEXT: movw $-3, %ax -; AVX512F-32-NEXT: kmovw %eax, %k0 ; AVX512F-32-NEXT: vucomiss 8(%ebp), %xmm2 ; AVX512F-32-NEXT: setnp %al ; AVX512F-32-NEXT: sete %cl ; AVX512F-32-NEXT: testb %al, %cl ; AVX512F-32-NEXT: setne %al -; AVX512F-32-NEXT: kmovw %eax, %k1 -; AVX512F-32-NEXT: kandw %k0, %k1, %k0 +; AVX512F-32-NEXT: andl $1, %eax +; AVX512F-32-NEXT: kmovw %eax, %k0 ; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX512F-32-NEXT: vucomiss 12(%ebp), %xmm2 ; AVX512F-32-NEXT: setnp %al @@ -380,10 +366,8 @@ ; AVX512F-64-NEXT: sete %cl ; AVX512F-64-NEXT: testb %al, %cl ; AVX512F-64-NEXT: setne %al +; AVX512F-64-NEXT: andl $1, %eax ; AVX512F-64-NEXT: kmovw %eax, %k0 -; AVX512F-64-NEXT: movw $-3, %ax -; AVX512F-64-NEXT: kmovw %eax, %k1 -; AVX512F-64-NEXT: kandw %k1, %k0, %k0 ; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] ; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX512F-64-NEXT: vucomiss %xmm3, %xmm2 diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll --- a/llvm/test/CodeGen/X86/vec_smulo.ll +++ b/llvm/test/CodeGen/X86/vec_smulo.ll @@ -2926,10 +2926,8 @@ ; AVX512F-NEXT: vmovq %rcx, %xmm1 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; AVX512F-NEXT: seto %al -; AVX512F-NEXT: movw $-3, %cx -; AVX512F-NEXT: kmovw %ecx, %k0 -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kandw %k0, %k1, %k0 +; AVX512F-NEXT: andl $1, %eax +; AVX512F-NEXT: kmovw %eax, %k0 ; AVX512F-NEXT: kmovw %edx, %k1 ; AVX512F-NEXT: kshiftlw $15, %k1, %k1 ; AVX512F-NEXT: kshiftrw $14, %k1, %k1 @@ -2952,10 +2950,8 @@ ; AVX512BW-NEXT: vmovq %rcx, %xmm1 ; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; AVX512BW-NEXT: seto %al -; AVX512BW-NEXT: movw $-3, %cx -; AVX512BW-NEXT: kmovd %ecx, %k0 -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: kandw %k0, %k1, %k0 +; AVX512BW-NEXT: andl $1, %eax +; AVX512BW-NEXT: kmovw %eax, %k0 ; AVX512BW-NEXT: kmovd %edx, %k1 ; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512BW-NEXT: kshiftrw $14, %k1, %k1 diff --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll --- a/llvm/test/CodeGen/X86/vec_umulo.ll +++ b/llvm/test/CodeGen/X86/vec_umulo.ll @@ -2604,10 +2604,8 @@ ; AVX512F-NEXT: vmovq %rax, %xmm1 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; AVX512F-NEXT: seto %al -; AVX512F-NEXT: movw $-3, %cx -; AVX512F-NEXT: kmovw %ecx, %k0 -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kandw %k0, %k1, %k0 +; AVX512F-NEXT: andl $1, %eax +; AVX512F-NEXT: kmovw %eax, %k0 ; AVX512F-NEXT: kmovw %r8d, %k1 ; AVX512F-NEXT: kshiftlw $15, %k1, %k1 ; AVX512F-NEXT: kshiftrw $14, %k1, %k1 @@ -2631,10 +2629,8 @@ ; AVX512BW-NEXT: vmovq %rax, %xmm1 ; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; AVX512BW-NEXT: seto %al -; AVX512BW-NEXT: movw $-3, %cx -; AVX512BW-NEXT: kmovd %ecx, %k0 -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: kandw %k0, %k1, %k0 +; AVX512BW-NEXT: andl $1, %eax +; AVX512BW-NEXT: kmovw %eax, %k0 ; AVX512BW-NEXT: kmovd %r8d, %k1 ; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512BW-NEXT: kshiftrw $14, %k1, %k1