Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -35481,19 +35481,26 @@ {Chain, LHS, RHS}, VT, MMO); } -// TEST (AND a, b) ,(AND a, b) -> TEST a, b static SDValue combineTestM(SDNode *N, SelectionDAG &DAG) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); - if (Op0 != Op1 || Op1->getOpcode() != ISD::AND) - return SDValue(); - EVT VT = N->getValueType(0); SDLoc DL(N); - return DAG.getNode(X86ISD::TESTM, DL, VT, - Op0->getOperand(0), Op0->getOperand(1)); + // TEST (AND a, b) ,(AND a, b) -> TEST a, b + if (Op0 == Op1 && Op1->getOpcode() == ISD::AND) + return DAG.getNode(X86ISD::TESTM, DL, VT, Op0->getOperand(0), + Op0->getOperand(1)); + + // TEST op0, BUILD_VECTOR(all_zero) -> BUILD_VECTOR(all_zero) + // TEST BUILD_VECTOR(all_zero) -> BUILD_VECTOR(all_zero) + + if (ISD::isBuildVectorAllZeros(Op0.getNode()) || + ISD::isBuildVectorAllZeros(Op1.getNode())) + return DAG.getSplatBuildVector(VT, DL, DAG.getConstant(0, DL, MVT::i8)); + + return SDValue(); } static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG, Index: test/CodeGen/X86/avx512vl-vec-masked-cmp.ll =================================================================== --- test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -40,8 +40,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -166,8 +165,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -295,8 +293,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -425,8 +422,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -555,8 +551,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -686,8 +681,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -820,8 +814,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -955,8 +948,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -1083,8 +1075,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -1133,8 +1124,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -1181,7 +1171,6 @@ ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 -; NoVLX-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 @@ -1195,7 +1184,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -1243,10 +1232,9 @@ ; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4 -; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 @@ -1257,7 +1245,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -1425,8 +1413,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -1501,8 +1488,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -1580,8 +1566,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -1660,8 +1645,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -1740,8 +1724,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -1821,8 +1804,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -1905,8 +1887,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -1990,8 +1971,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -2091,8 +2071,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -2218,8 +2197,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -2348,8 +2326,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -2479,8 +2456,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -2610,8 +2586,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -2742,8 +2717,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -2877,8 +2851,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -3013,8 +2986,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -3443,8 +3415,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -3710,8 +3681,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -3908,29 +3878,28 @@ ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm8 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 +; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} -; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm4 -; NoVLX-NEXT: vpmovdb %zmm6, %xmm6 +; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm3 +; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm8, %ymm2 +; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm4, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 @@ -3996,85 +3965,85 @@ ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3 -; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 +; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 +; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpand %xmm6, %xmm2, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0 +; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -4139,57 +4108,57 @@ ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 @@ -4200,160 +4169,159 @@ ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm2 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3 +; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm5, %ymm3 -; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm3, %ymm3 -; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2 +; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm2, %ymm2 +; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 +; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 +; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %eax, %xmm3 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm4, %ymm4 -; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm4 -; NoVLX-NEXT: vpslld $31, %zmm4, %zmm4 -; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 +; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm3, %ymm3 +; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 +; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 +; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -5535,8 +5503,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -5585,8 +5552,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -5655,8 +5621,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -5727,8 +5692,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -5783,8 +5747,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -5855,8 +5818,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -6102,8 +6064,7 @@ ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -6178,8 +6139,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -6258,8 +6218,7 @@ ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -6339,8 +6298,7 @@ ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -6419,8 +6377,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -6500,8 +6457,7 @@ ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -6581,8 +6537,7 @@ ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -6662,8 +6617,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -6747,8 +6701,7 @@ ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -6833,8 +6786,7 @@ ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -6918,8 +6870,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -7004,8 +6955,7 @@ ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -7103,8 +7053,7 @@ ; NoVLX-NEXT: .Lcfi255: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -7227,8 +7176,7 @@ ; NoVLX-NEXT: .Lcfi263: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -7354,8 +7302,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -7482,8 +7429,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -7610,8 +7556,7 @@ ; NoVLX-NEXT: .Lcfi287: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -7738,8 +7683,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -7867,8 +7811,7 @@ ; NoVLX-NEXT: .Lcfi303: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -7996,8 +7939,7 @@ ; NoVLX-NEXT: .Lcfi311: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -8128,8 +8070,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -8261,8 +8202,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -8394,8 +8334,7 @@ ; NoVLX-NEXT: .Lcfi335: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -8527,8 +8466,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -9735,8 +9673,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -9785,8 +9722,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -9847,8 +9783,7 @@ ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -9911,8 +9846,7 @@ ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -9967,8 +9901,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -10031,8 +9964,7 @@ ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -11265,8 +11197,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -11317,8 +11248,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -11389,8 +11319,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -11463,8 +11392,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -11521,8 +11449,7 @@ ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -11595,8 +11522,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -11816,8 +11742,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -11890,8 +11815,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -11967,8 +11891,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -12045,8 +11968,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -12123,8 +12045,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -12201,8 +12122,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -12280,8 +12200,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -12359,8 +12278,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -12441,8 +12359,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -12524,8 +12441,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -12607,8 +12523,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -12690,8 +12605,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -12791,8 +12705,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -12917,8 +12830,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -13046,8 +12958,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -13176,8 +13087,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -13306,8 +13216,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -13437,8 +13346,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -13571,8 +13479,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -13706,8 +13613,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -13834,8 +13740,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -13884,8 +13789,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -13932,7 +13836,6 @@ ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 -; NoVLX-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 @@ -13946,7 +13849,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -13994,10 +13897,9 @@ ; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4 -; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 @@ -14008,7 +13910,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -14176,8 +14078,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -14252,8 +14153,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -14331,8 +14231,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -14411,8 +14310,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -14491,8 +14389,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -14572,8 +14469,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -14656,8 +14552,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -14741,8 +14636,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -14842,8 +14736,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -14969,8 +14862,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -15099,8 +14991,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -15230,8 +15121,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -15361,8 +15251,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -15493,8 +15382,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -15628,8 +15516,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -15764,8 +15651,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -16194,8 +16080,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -16461,8 +16346,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -16659,29 +16543,28 @@ ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm8 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 +; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm4 -; NoVLX-NEXT: vpmovdb %zmm6, %xmm6 +; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm3 +; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm8, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 @@ -16747,85 +16630,85 @@ ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3 -; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 +; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 +; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpand %xmm6, %xmm2, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0 +; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -16890,57 +16773,57 @@ ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 @@ -16951,160 +16834,159 @@ ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm2 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3 +; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm5, %ymm3 -; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm3, %ymm3 -; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2 +; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm2, %ymm2 +; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 +; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 +; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %eax, %xmm3 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm4, %ymm4 -; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm4 -; NoVLX-NEXT: vpslld $31, %zmm4, %zmm4 -; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 +; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm3, %ymm3 +; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 +; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 +; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -18286,8 +18168,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -18336,8 +18217,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -18406,8 +18286,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -18478,8 +18357,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -18534,8 +18412,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -18606,8 +18483,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -18853,8 +18729,7 @@ ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -18929,8 +18804,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -19009,8 +18883,7 @@ ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -19090,8 +18963,7 @@ ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -19170,8 +19042,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -19251,8 +19122,7 @@ ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -19332,8 +19202,7 @@ ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -19413,8 +19282,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -19498,8 +19366,7 @@ ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -19584,8 +19451,7 @@ ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -19669,8 +19535,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -19755,8 +19620,7 @@ ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -19854,8 +19718,7 @@ ; NoVLX-NEXT: .Lcfi707: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -19978,8 +19841,7 @@ ; NoVLX-NEXT: .Lcfi715: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -20105,8 +19967,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -20233,8 +20094,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -20361,8 +20221,7 @@ ; NoVLX-NEXT: .Lcfi739: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -20489,8 +20348,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -20618,8 +20476,7 @@ ; NoVLX-NEXT: .Lcfi755: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -20747,8 +20604,7 @@ ; NoVLX-NEXT: .Lcfi763: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -20879,8 +20735,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -21012,8 +20867,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -21145,8 +20999,7 @@ ; NoVLX-NEXT: .Lcfi787: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -21278,8 +21131,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -22486,8 +22338,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -22536,8 +22387,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -22598,8 +22448,7 @@ ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -22662,8 +22511,7 @@ ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -22718,8 +22566,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -22782,8 +22629,7 @@ ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -24016,8 +23862,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -24068,8 +23913,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -24140,8 +23984,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -24214,8 +24057,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -24272,8 +24114,7 @@ ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -24346,8 +24187,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -24567,8 +24407,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -24641,8 +24480,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -24718,8 +24556,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -24796,8 +24633,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -24874,8 +24710,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -24952,8 +24787,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -25031,8 +24865,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -25110,8 +24943,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -25192,8 +25024,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -25275,8 +25106,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -25358,8 +25188,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -25441,8 +25270,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -25544,8 +25372,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -25673,8 +25500,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -25804,8 +25630,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -25937,8 +25762,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -26069,8 +25893,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -26203,8 +26026,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -26339,8 +26161,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -26477,8 +26298,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -26607,8 +26427,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -26660,8 +26479,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -26708,7 +26526,6 @@ ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 -; NoVLX-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 @@ -26724,7 +26541,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -26772,13 +26589,12 @@ ; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; NoVLX-NEXT: vmovdqa (%rsi), %ymm4 -; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm4, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4 -; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm3 +; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm3, %ymm0 +; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 +; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 @@ -26789,7 +26605,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -26969,8 +26785,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -27048,8 +26863,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -27129,8 +26943,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -27212,8 +27025,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -27294,8 +27106,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -27378,8 +27189,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -27464,8 +27274,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -27552,8 +27361,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -27655,8 +27463,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -27785,8 +27592,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -27917,8 +27723,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -28051,8 +27856,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -28184,8 +27988,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -28319,8 +28122,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -28456,8 +28258,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -28595,8 +28396,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -29028,8 +28828,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -29300,8 +29099,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -29498,31 +29296,30 @@ ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm8 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 +; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm4 -; NoVLX-NEXT: vpmovdb %zmm6, %xmm6 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm3 +; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm8, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 -; NoVLX-NEXT: vpxor %ymm5, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 +; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 @@ -29588,86 +29385,86 @@ ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3 -; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3 +; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 +; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 +; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpand %xmm6, %xmm2, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0 +; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -29732,81 +29529,147 @@ ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm4 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm2 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2 +; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm5, %ymm3 -; NoVLX-NEXT: vmovdqa (%rsi), %ymm5 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm5, %ymm3 -; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm5 -; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm5, %ymm4 -; NoVLX-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 -; NoVLX-NEXT: vpxor %ymm5, %ymm3, %ymm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm4 +; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm5 +; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm3 +; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 +; NoVLX-NEXT: vpxor %ymm4, %ymm5, %ymm2 +; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 +; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 +; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftlw $15, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kshiftlw $13, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kshiftlw $12, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kshiftlw $11, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kshiftlw $10, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kshiftlw $9, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kshiftlw $8, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kshiftlw $7, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kshiftlw $6, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kshiftlw $5, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kshiftlw $4, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kshiftlw $3, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kshiftlw $2, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kshiftlw $1, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kshiftrw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3 ; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 ; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 @@ -29873,85 +29736,18 @@ ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm4 -; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm4 -; NoVLX-NEXT: vpslld $31, %zmm4, %zmm4 -; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -31165,8 +30961,7 @@ ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -31218,8 +31013,7 @@ ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -31288,8 +31082,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -31361,8 +31154,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -31420,8 +31212,7 @@ ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -31493,8 +31284,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -31742,8 +31532,7 @@ ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -31818,8 +31607,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -31898,8 +31686,7 @@ ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -31979,8 +31766,7 @@ ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -32060,8 +31846,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -32142,8 +31927,7 @@ ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -32223,8 +32007,7 @@ ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -32304,8 +32087,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -32389,8 +32171,7 @@ ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -32475,8 +32256,7 @@ ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -32561,8 +32341,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -32648,8 +32427,7 @@ ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -32747,8 +32525,7 @@ ; NoVLX-NEXT: .Lcfi1159: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -32871,8 +32648,7 @@ ; NoVLX-NEXT: .Lcfi1167: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -32998,8 +32774,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -33126,8 +32901,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -33256,8 +33030,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -33386,8 +33159,7 @@ ; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -33515,8 +33287,7 @@ ; NoVLX-NEXT: .Lcfi1207: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -33644,8 +33415,7 @@ ; NoVLX-NEXT: .Lcfi1215: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -33776,8 +33546,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -33909,8 +33678,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -34044,8 +33812,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -34179,8 +33946,7 @@ ; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -35429,8 +35195,7 @@ ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -35482,8 +35247,7 @@ ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -35544,8 +35308,7 @@ ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -35609,8 +35372,7 @@ ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -35668,8 +35430,7 @@ ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -35733,8 +35494,7 @@ ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -37017,8 +36777,7 @@ ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -37072,8 +36831,7 @@ ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -37146,8 +36904,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -37223,8 +36980,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -37284,8 +37040,7 @@ ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -37361,8 +37116,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -37586,8 +37340,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -37660,8 +37413,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -37737,8 +37489,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -37815,8 +37566,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -37895,8 +37645,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -37975,8 +37724,7 @@ ; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -38054,8 +37802,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -38133,8 +37880,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -38215,8 +37961,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -38298,8 +38043,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -38383,8 +38127,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -38468,8 +38211,7 @@ ; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -38572,8 +38314,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -38701,8 +38442,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -38833,8 +38573,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -38966,8 +38705,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -39099,8 +38837,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -39233,8 +38970,7 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -39370,8 +39106,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -39508,8 +39243,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -39639,8 +39373,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -39692,8 +39425,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -39740,10 +39472,9 @@ ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 -; NoVLX-NEXT: vxorps %xmm4, %xmm4, %xmm4 -; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; NoVLX-NEXT: vpxor %ymm5, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm5, %ymm1, %ymm1 +; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 @@ -39757,7 +39488,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -39805,13 +39536,12 @@ ; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm4, %ymm4 -; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm4, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4 -; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 +; NoVLX-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor (%rsi), %ymm3, %ymm3 +; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm3, %ymm0 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 @@ -39822,7 +39552,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -40005,8 +39735,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -40084,8 +39813,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -40166,8 +39894,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -40249,8 +39976,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -40332,8 +40058,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -40416,8 +40141,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -40503,8 +40227,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -40591,8 +40314,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -40695,8 +40417,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -40825,8 +40546,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -40958,8 +40678,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -41092,8 +40811,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -41226,8 +40944,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -41361,8 +41078,7 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -41499,8 +41215,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -41638,8 +41353,7 @@ ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -42073,8 +41787,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -42345,8 +42058,7 @@ ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -42513,60 +42225,59 @@ ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm8 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6 +; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm4 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} -; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm5, %ymm5 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm7 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 -; NoVLX-NEXT: vmovdqa {{.*#+}} ymm6 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] -; NoVLX-NEXT: vpxor %ymm6, %ymm4, %ymm3 -; NoVLX-NEXT: vpxor %ymm6, %ymm2, %ymm2 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 +; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] +; NoVLX-NEXT: vpxor %ymm5, %ymm6, %ymm3 +; NoVLX-NEXT: vpxor %ymm5, %ymm2, %ymm2 ; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 @@ -42633,10 +42344,10 @@ ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm4 -; NoVLX-NEXT: vpxor %ymm6, %ymm8, %ymm2 -; NoVLX-NEXT: vpxor %ymm6, %ymm5, %ymm3 -; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 +; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3 +; NoVLX-NEXT: vpxor %ymm5, %ymm8, %ymm2 +; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm4 +; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 @@ -42703,18 +42414,18 @@ ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpand %xmm7, %xmm2, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: vpand %xmm1, %xmm2, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm0, %xmm4, %xmm0 +; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -42763,8 +42474,8 @@ ; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm4 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm5 +; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax @@ -42777,9 +42488,9 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm5, %rcx +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 @@ -42787,7 +42498,7 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx @@ -42797,9 +42508,9 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 @@ -42807,7 +42518,7 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx @@ -42819,7 +42530,7 @@ ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 @@ -42836,169 +42547,168 @@ ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm7 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm6 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm2 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 +; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm5, %ymm4 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm7, %xmm3 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm3, %ymm3 -; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] -; NoVLX-NEXT: vpxor %ymm5, %ymm3, %ymm3 -; NoVLX-NEXT: vpxor (%rsi), %ymm5, %ymm6 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm6, %ymm3 -; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm6, %xmm2 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 +; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] +; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2 +; NoVLX-NEXT: vpxor (%rsi), %ymm4, %ymm5 +; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm5, %ymm2 +; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 +; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 +; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %eax, %xmm3 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm4 -; NoVLX-NEXT: vpxor 32(%rsi), %ymm5, %ymm5 -; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm5, %ymm4 -; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm4 -; NoVLX-NEXT: vpslld $31, %zmm4, %zmm4 -; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 +; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3 +; NoVLX-NEXT: vpxor 32(%rsi), %ymm4, %ymm4 +; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3 +; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 +; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 +; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -44237,8 +43947,7 @@ ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -44290,8 +43999,7 @@ ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -44363,8 +44071,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -44438,8 +44145,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -44497,8 +44203,7 @@ ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -44572,8 +44277,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -44819,8 +44523,7 @@ ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -44895,8 +44598,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -44975,8 +44677,7 @@ ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -45056,8 +44757,7 @@ ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -45136,8 +44836,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -45217,8 +44916,7 @@ ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -45298,8 +44996,7 @@ ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -45379,8 +45076,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -45464,8 +45160,7 @@ ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -45550,8 +45245,7 @@ ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -45635,8 +45329,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -45721,8 +45414,7 @@ ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -45820,8 +45512,7 @@ ; NoVLX-NEXT: .Lcfi1611: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -45944,8 +45635,7 @@ ; NoVLX-NEXT: .Lcfi1619: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -46071,8 +45761,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -46199,8 +45888,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -46327,8 +46015,7 @@ ; NoVLX-NEXT: .Lcfi1643: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -46455,8 +46142,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -46584,8 +46270,7 @@ ; NoVLX-NEXT: .Lcfi1659: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -46713,8 +46398,7 @@ ; NoVLX-NEXT: .Lcfi1667: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -46845,8 +46529,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -46978,8 +46661,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -47111,8 +46793,7 @@ ; NoVLX-NEXT: .Lcfi1691: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -47244,8 +46925,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -48527,8 +48207,7 @@ ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -48580,8 +48259,7 @@ ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -48645,8 +48323,7 @@ ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -48712,8 +48389,7 @@ ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -48771,8 +48447,7 @@ ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -48838,8 +48513,7 @@ ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -50129,8 +49803,7 @@ ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -50184,8 +49857,7 @@ ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -50259,8 +49931,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -50336,8 +50007,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -50397,8 +50067,7 @@ ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -50474,8 +50143,7 @@ ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -50695,8 +50363,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -50769,8 +50436,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -50846,8 +50512,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -50924,8 +50589,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -51002,8 +50666,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -51080,8 +50743,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -51159,8 +50821,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -51238,8 +50899,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -51320,8 +50980,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -51403,8 +51062,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -51486,8 +51144,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -51569,8 +51226,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -52685,8 +52341,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -52735,8 +52390,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -52787,8 +52441,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -52845,10 +52498,9 @@ ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vpmovqd %zmm2, %ymm2 -; NoVLX-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -52905,10 +52557,9 @@ ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpmovqd %zmm1, %ymm1 -; NoVLX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -52966,11 +52617,10 @@ ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpmovqd %zmm1, %ymm1 -; NoVLX-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm3 -; NoVLX-NEXT: vcmpeqps %xmm3, %xmm0, %xmm0 +; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 +; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -53216,8 +52866,7 @@ ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -53292,8 +52941,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -53369,8 +53017,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -53450,8 +53097,7 @@ ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -53531,8 +53177,7 @@ ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -53613,8 +53258,7 @@ ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -53695,8 +53339,7 @@ ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -53776,8 +53419,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -53858,8 +53500,7 @@ ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -53944,8 +53585,7 @@ ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -54030,8 +53670,7 @@ ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -54117,8 +53756,7 @@ ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -54217,8 +53855,7 @@ ; NoVLX-NEXT: .Lcfi1887: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -54341,8 +53978,7 @@ ; NoVLX-NEXT: .Lcfi1895: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -54466,8 +54102,7 @@ ; NoVLX-NEXT: .Lcfi1903: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -54594,8 +54229,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -54722,8 +54356,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -54851,8 +54484,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -55027,8 +54659,7 @@ ; NoVLX-NEXT: .Lcfi1935: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -55156,8 +54787,7 @@ ; NoVLX-NEXT: .Lcfi1943: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -55286,8 +54916,7 @@ ; NoVLX-NEXT: .Lcfi1951: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -55419,8 +55048,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -55552,8 +55180,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -55686,8 +55313,7 @@ ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -56886,8 +56512,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -56936,8 +56561,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -56988,8 +56612,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -57045,10 +56668,9 @@ ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -57104,10 +56726,9 @@ ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -57164,11 +56785,10 @@ ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm3, %xmm0, %xmm0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -58293,8 +57913,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -58345,8 +57964,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -58399,8 +58017,7 @@ ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -58458,11 +58075,10 @@ ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vpmovqd %zmm2, %ymm2 -; NoVLX-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -58520,11 +58136,10 @@ ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpmovqd %zmm1, %ymm1 -; NoVLX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -58583,12 +58198,11 @@ ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpmovqd %zmm1, %ymm1 -; NoVLX-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm3 -; NoVLX-NEXT: vcmpeqpd %ymm3, %ymm0, %ymm0 +; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 +; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -58862,8 +58476,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -58936,8 +58549,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -59011,8 +58623,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -59089,8 +58700,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -59167,8 +58777,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -59246,8 +58855,7 @@ ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 @@ -59374,8 +58982,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -59453,8 +59060,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -59533,8 +59139,7 @@ ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -59616,8 +59221,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -59699,8 +59303,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -59783,8 +59386,7 @@ ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)