diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3955,6 +3955,33 @@ if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( VT, N0, N1, Cond, DCI, dl)) return CC; + + // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y). + bool CmpZero = N1C->getAPIntValue().isNullValue(); + bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue(); + if ((CmpZero || CmpNegOne) && N0.getOpcode() == ISD::OR && + N0.hasOneUse()) { + SDValue LHS = N0.getOperand(0); + SDValue RHS = N0.getOperand(1); + if (RHS.getOpcode() != ISD::SHL) + std::swap(LHS, RHS); + unsigned OpEltBits = N1.getScalarValueSizeInBits(); + unsigned OpHalfBits = OpEltBits / 2; + APInt UpperBits = APInt::getHighBitsSet(OpEltBits, OpHalfBits); + // Check for even bitwidth + unshifted element must have zero upperbits. + if ((OpEltBits % 2) == 0 && RHS.getOpcode() == ISD::SHL && + isa(RHS.getOperand(1)) && + RHS.getConstantOperandAPInt(1) == OpHalfBits && + DAG.MaskedValueIsZero(LHS, UpperBits)) { + SDValue LowerBits = DAG.getConstant(~UpperBits, dl, OpVT); + SDValue UpperMask = + DAG.getNode(ISD::AND, dl, OpVT, RHS.getOperand(0), LowerBits); + SDValue NewN0 = DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, + LHS, UpperMask); + SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LowerBits; + return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond); + } + } } // If we have "setcc X, C0", check to see if we can shrink the immediate diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -2148,18 +2148,15 @@ ; ; KNL-LABEL: ktest_2: ; KNL: ## %bb.0: -; KNL-NEXT: vcmpgtps 64(%rdi), %zmm1, %k1 -; KNL-NEXT: vcmpgtps (%rdi), %zmm0, %k2 -; KNL-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z} -; KNL-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z} -; KNL-NEXT: vcmpltps %zmm3, %zmm1, %k0 -; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k3 +; KNL-NEXT: vcmpgtps (%rdi), %zmm0, %k1 +; KNL-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2 +; KNL-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} +; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} +; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0 +; KNL-NEXT: vcmpltps %zmm2, %zmm1, %k3 ; KNL-NEXT: korw %k3, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: korw %k0, %k1, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: kortestw %k2, %k0 ; KNL-NEXT: je LBB45_2 ; KNL-NEXT: ## %bb.1: ## %L1 ; KNL-NEXT: vmovaps %zmm0, (%rdi) @@ -2220,18 +2217,15 @@ ; ; AVX512DQ-LABEL: ktest_2: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vcmpgtps 64(%rdi), %zmm1, %k1 -; AVX512DQ-NEXT: vcmpgtps (%rdi), %zmm0, %k2 -; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z} -; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z} -; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm1, %k0 -; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm0, %k3 +; AVX512DQ-NEXT: vcmpgtps (%rdi), %zmm0, %k1 +; AVX512DQ-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2 +; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} +; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} +; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm0, %k0 +; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm1, %k3 ; AVX512DQ-NEXT: korw %k3, %k2, %k2 -; AVX512DQ-NEXT: kmovw %k2, %eax ; AVX512DQ-NEXT: korw %k0, %k1, %k0 -; AVX512DQ-NEXT: kmovw %k0, %ecx -; AVX512DQ-NEXT: shll $16, %ecx -; AVX512DQ-NEXT: orl %eax, %ecx +; AVX512DQ-NEXT: kortestw %k2, %k0 ; AVX512DQ-NEXT: je LBB45_2 ; AVX512DQ-NEXT: ## %bb.1: ## %L1 ; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi) @@ -4861,15 +4855,12 @@ ; KNL-NEXT: vpcmpeqw %ymm5, %ymm3, %ymm3 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; KNL-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2 -; KNL-NEXT: vpmovsxwd %ymm2, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm0 +; KNL-NEXT: vpor %ymm0, %ymm2, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: kortestw %k0, %k0 ; KNL-NEXT: je LBB77_1 ; KNL-NEXT: ## %bb.2: ## %exit ; KNL-NEXT: vzeroupper @@ -4945,15 +4936,12 @@ ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm3, %ymm3 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; AVX512DQ-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2 -; AVX512DQ-NEXT: vpmovsxwd %ymm2, %zmm0 -; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0 +; AVX512DQ-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %ecx -; AVX512DQ-NEXT: shll $16, %ecx -; AVX512DQ-NEXT: orl %eax, %ecx +; AVX512DQ-NEXT: kortestw %k0, %k0 ; AVX512DQ-NEXT: je LBB77_1 ; AVX512DQ-NEXT: ## %bb.2: ## %exit ; AVX512DQ-NEXT: vzeroupper @@ -5027,11 +5015,10 @@ ; KNL-NEXT: vpcmpeqb %ymm5, %ymm3, %ymm3 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; KNL-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2 -; KNL-NEXT: vpmovmskb %ymm2, %eax ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm0 -; KNL-NEXT: vpmovmskb %ymm0, %ecx -; KNL-NEXT: shlq $32, %rcx -; KNL-NEXT: orq %rax, %rcx +; KNL-NEXT: vpor %ymm0, %ymm2, %ymm0 +; KNL-NEXT: vpmovmskb %ymm0, %eax +; KNL-NEXT: testl %eax, %eax ; KNL-NEXT: je LBB78_1 ; KNL-NEXT: ## %bb.2: ## %exit ; KNL-NEXT: vzeroupper @@ -5107,11 +5094,10 @@ ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm3, %ymm3 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; AVX512DQ-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2 -; AVX512DQ-NEXT: vpmovmskb %ymm2, %eax ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0 -; AVX512DQ-NEXT: vpmovmskb %ymm0, %ecx -; AVX512DQ-NEXT: shlq $32, %rcx -; AVX512DQ-NEXT: orq %rax, %rcx +; AVX512DQ-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512DQ-NEXT: vpmovmskb %ymm0, %eax +; AVX512DQ-NEXT: testl %eax, %eax ; AVX512DQ-NEXT: je LBB78_1 ; AVX512DQ-NEXT: ## %bb.2: ## %exit ; AVX512DQ-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -49,22 +49,18 @@ define i1 @allones_v32i8_sign(<32 x i8> %arg) { ; SSE-LABEL: allones_v32i8_sign: ; SSE: # %bb.0: +; SSE-NEXT: pand %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: pmovmskb %xmm1, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: orl %eax, %ecx -; SSE-NEXT: cmpl $-1, %ecx +; SSE-NEXT: cmpw $-1, %ax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v32i8_sign: ; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: cmpl $-1, %ecx +; AVX1-NEXT: cmpw $-1, %ax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -93,20 +89,18 @@ define i1 @allzeros_v32i8_sign(<32 x i8> %arg) { ; SSE-LABEL: allzeros_v32i8_sign: ; SSE: # %bb.0: +; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: pmovmskb %xmm1, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i8_sign: ; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx +; AVX1-NEXT: testl %eax, %eax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -143,9 +137,8 @@ ; SSE-NEXT: pmovmskb %xmm3, %edx ; SSE-NEXT: shll $16, %edx ; SSE-NEXT: orl %eax, %edx -; SSE-NEXT: shlq $32, %rdx -; SSE-NEXT: orq %rcx, %rdx -; SSE-NEXT: cmpq $-1, %rdx +; SSE-NEXT: andl %ecx, %edx +; SSE-NEXT: cmpl $-1, %edx ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; @@ -161,20 +154,17 @@ ; AVX1-NEXT: vpmovmskb %xmm0, %edx ; AVX1-NEXT: shll $16, %edx ; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: shlq $32, %rdx -; AVX1-NEXT: orq %rcx, %rdx -; AVX1-NEXT: cmpq $-1, %rdx +; AVX1-NEXT: andl %ecx, %edx +; AVX1-NEXT: cmpl $-1, %edx ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: allones_v64i8_sign: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovmskb %ymm1, %eax -; AVX2-NEXT: shlq $32, %rax -; AVX2-NEXT: vpmovmskb %ymm0, %ecx -; AVX2-NEXT: orq %rax, %rcx -; AVX2-NEXT: cmpq $-1, %rcx +; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: cmpl $-1, %eax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -182,11 +172,9 @@ ; KNL-LABEL: allones_v64i8_sign: ; KNL: # %bb.0: ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; KNL-NEXT: vpmovmskb %ymm1, %eax -; KNL-NEXT: shlq $32, %rax -; KNL-NEXT: vpmovmskb %ymm0, %ecx -; KNL-NEXT: orq %rax, %rcx -; KNL-NEXT: cmpq $-1, %rcx +; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vpmovmskb %ymm0, %eax +; KNL-NEXT: cmpl $-1, %eax ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -207,43 +195,40 @@ define i1 @allzeros_v64i8_sign(<64 x i8> %arg) { ; SSE-LABEL: allzeros_v64i8_sign: ; SSE: # %bb.0: -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: pmovmskb %xmm2, %eax +; SSE-NEXT: pmovmskb %xmm3, %ecx ; SSE-NEXT: shll $16, %ecx ; SSE-NEXT: orl %eax, %ecx -; SSE-NEXT: pmovmskb %xmm2, %eax -; SSE-NEXT: pmovmskb %xmm3, %edx +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: pmovmskb %xmm1, %edx ; SSE-NEXT: shll $16, %edx ; SSE-NEXT: orl %eax, %edx -; SSE-NEXT: shlq $32, %rdx -; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: orl %ecx, %edx ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v64i8_sign: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: vpmovmskb %xmm1, %eax +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpmovmskb %xmm1, %ecx ; AVX1-NEXT: shll $16, %ecx ; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpmovmskb %xmm1, %eax -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %edx ; AVX1-NEXT: shll $16, %edx ; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: shlq $32, %rdx -; AVX1-NEXT: orq %rcx, %rdx +; AVX1-NEXT: orl %ecx, %edx ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: allzeros_v64i8_sign: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovmskb %ymm1, %eax -; AVX2-NEXT: shlq $32, %rax -; AVX2-NEXT: vpmovmskb %ymm0, %ecx -; AVX2-NEXT: orq %rax, %rcx +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: testl %eax, %eax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -251,10 +236,9 @@ ; KNL-LABEL: allzeros_v64i8_sign: ; KNL: # %bb.0: ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; KNL-NEXT: vpmovmskb %ymm1, %eax -; KNL-NEXT: shlq $32, %rax -; KNL-NEXT: vpmovmskb %ymm0, %ecx -; KNL-NEXT: orq %rax, %rcx +; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vpmovmskb %ymm0, %eax +; KNL-NEXT: testl %eax, %eax ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -460,12 +444,10 @@ ; SSE-LABEL: allones_v32i16_sign: ; SSE: # %bb.0: ; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: packsswb %xmm3, %xmm2 -; SSE-NEXT: pmovmskb %xmm2, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: orl %eax, %ecx -; SSE-NEXT: cmpl $-1, %ecx +; SSE-NEXT: pand %xmm0, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %eax +; SSE-NEXT: cmpw $-1, %ax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; @@ -473,13 +455,11 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 -; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: cmpl $-1, %ecx +; AVX1-NEXT: cmpw $-1, %ax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -505,9 +485,8 @@ ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: cmpl $-1, %ecx +; KNL-NEXT: andl %eax, %ecx +; KNL-NEXT: cmpl $65535, %ecx # imm = 0xFFFF ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -528,25 +507,23 @@ define i1 @allzeros_v32i16_sign(<32 x i16> %arg) { ; SSE-LABEL: allzeros_v32i16_sign: ; SSE: # %bb.0: +; SSE-NEXT: packsswb %xmm3, %xmm2 ; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: por %xmm2, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: packsswb %xmm3, %xmm2 -; SSE-NEXT: pmovmskb %xmm2, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i16_sign: ; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 -; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx +; AVX1-NEXT: testl %eax, %eax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -562,18 +539,14 @@ ; ; KNL-LABEL: allzeros_v32i16_sign: ; KNL: # %bb.0: -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2 -; KNL-NEXT: vpmovsxwd %ymm2, %zmm2 -; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; KNL-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 +; KNL-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: kortestw %k0, %k0 ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -1274,25 +1247,21 @@ ; SSE-LABEL: allones_v32i8_and1: ; SSE: # %bb.0: ; SSE-NEXT: psllw $7, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: psllw $7, %xmm1 -; SSE-NEXT: pmovmskb %xmm1, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: orl %eax, %ecx -; SSE-NEXT: cmpl $-1, %ecx +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pmovmskb %xmm1, %eax +; SSE-NEXT: cmpw $-1, %ax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v32i8_and1: ; AVX1: # %bb.0: ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm1 -; AVX1-NEXT: vpmovmskb %xmm1, %eax ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: cmpl $-1, %ecx +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: cmpw $-1, %ax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1332,24 +1301,21 @@ define i1 @allzeros_v32i8_and1(<32 x i8> %arg) { ; SSE-LABEL: allzeros_v32i8_and1: ; SSE: # %bb.0: +; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: psllw $7, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: psllw $7, %xmm1 -; SSE-NEXT: pmovmskb %xmm1, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i8_and1: ; AVX1: # %bb.0: -; AVX1-NEXT: vpsllw $7, %xmm0, %xmm1 -; AVX1-NEXT: vpmovmskb %xmm1, %eax -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: testl %eax, %eax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1401,9 +1367,8 @@ ; SSE-NEXT: pmovmskb %xmm3, %edx ; SSE-NEXT: shll $16, %edx ; SSE-NEXT: orl %eax, %edx -; SSE-NEXT: shlq $32, %rdx -; SSE-NEXT: orq %rcx, %rdx -; SSE-NEXT: cmpq $-1, %rdx +; SSE-NEXT: andl %ecx, %edx +; SSE-NEXT: cmpl $-1, %edx ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; @@ -1423,22 +1388,19 @@ ; AVX1-NEXT: vpmovmskb %xmm0, %edx ; AVX1-NEXT: shll $16, %edx ; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: shlq $32, %rdx -; AVX1-NEXT: orq %rcx, %rdx -; AVX1-NEXT: cmpq $-1, %rdx +; AVX1-NEXT: andl %ecx, %edx +; AVX1-NEXT: cmpl $-1, %edx ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: allones_v64i8_and1: ; AVX2: # %bb.0: -; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1 -; AVX2-NEXT: vpmovmskb %ymm1, %eax -; AVX2-NEXT: shlq $32, %rax ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %ecx -; AVX2-NEXT: orq %rax, %rcx -; AVX2-NEXT: cmpq $-1, %rcx +; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: cmpl $-1, %eax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1448,11 +1410,9 @@ ; KNL-NEXT: vpsllw $7, %ymm0, %ymm1 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 +; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vpmovmskb %ymm0, %eax -; KNL-NEXT: shlq $32, %rax -; KNL-NEXT: vpmovmskb %ymm1, %ecx -; KNL-NEXT: orq %rax, %rcx -; KNL-NEXT: cmpq $-1, %rcx +; KNL-NEXT: cmpl $-1, %eax ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -1474,66 +1434,61 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) { ; SSE-LABEL: allzeros_v64i8_and1: ; SSE: # %bb.0: -; SSE-NEXT: psllw $7, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: psllw $7, %xmm1 -; SSE-NEXT: pmovmskb %xmm1, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: orl %eax, %ecx ; SSE-NEXT: psllw $7, %xmm2 ; SSE-NEXT: pmovmskb %xmm2, %eax ; SSE-NEXT: psllw $7, %xmm3 -; SSE-NEXT: pmovmskb %xmm3, %edx +; SSE-NEXT: pmovmskb %xmm3, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: psllw $7, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: psllw $7, %xmm1 +; SSE-NEXT: pmovmskb %xmm1, %edx ; SSE-NEXT: shll $16, %edx ; SSE-NEXT: orl %eax, %edx -; SSE-NEXT: shlq $32, %rdx -; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: orl %ecx, %edx ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v64i8_and1: ; AVX1: # %bb.0: -; AVX1-NEXT: vpsllw $7, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $7, %xmm1, %xmm2 ; AVX1-NEXT: vpmovmskb %xmm2, %eax -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1 +; AVX1-NEXT: vpmovmskb %xmm1, %ecx ; AVX1-NEXT: shll $16, %ecx ; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpsllw $7, %xmm1, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpsllw $7, %xmm0, %xmm1 +; AVX1-NEXT: vpmovmskb %xmm1, %eax +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %edx ; AVX1-NEXT: shll $16, %edx ; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: shlq $32, %rdx -; AVX1-NEXT: orq %rcx, %rdx +; AVX1-NEXT: orl %ecx, %edx ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: allzeros_v64i8_and1: ; AVX2: # %bb.0: -; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1 -; AVX2-NEXT: vpmovmskb %ymm1, %eax -; AVX2-NEXT: shlq $32, %rax +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %ecx -; AVX2-NEXT: orq %rax, %rcx +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: testl %eax, %eax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; KNL-LABEL: allzeros_v64i8_and1: ; KNL: # %bb.0: -; KNL-NEXT: vpsllw $7, %ymm0, %ymm1 -; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; KNL-NEXT: vpsllw $7, %ymm1, %ymm1 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 +; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vpmovmskb %ymm0, %eax -; KNL-NEXT: shlq $32, %rax -; KNL-NEXT: vpmovmskb %ymm1, %ecx -; KNL-NEXT: orq %rax, %rcx +; KNL-NEXT: testl %eax, %eax ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -1703,14 +1658,12 @@ ; SSE-NEXT: psllw $15, %xmm1 ; SSE-NEXT: psllw $15, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: psllw $15, %xmm3 ; SSE-NEXT: psllw $15, %xmm2 ; SSE-NEXT: packsswb %xmm3, %xmm2 -; SSE-NEXT: pmovmskb %xmm2, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: orl %eax, %ecx -; SSE-NEXT: cmpl $-1, %ecx +; SSE-NEXT: pand %xmm0, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %eax +; SSE-NEXT: cmpw $-1, %ax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; @@ -1720,15 +1673,13 @@ ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 -; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1 -; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: cmpl $-1, %ecx +; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: cmpw $-1, %ax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1757,9 +1708,8 @@ ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: cmpl $-1, %ecx +; KNL-NEXT: andl %eax, %ecx +; KNL-NEXT: cmpl $65535, %ecx # imm = 0xFFFF ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -1781,33 +1731,31 @@ define i1 @allzeros_v32i16_and1(<32 x i16> %arg) { ; SSE-LABEL: allzeros_v32i16_and1: ; SSE: # %bb.0: +; SSE-NEXT: psllw $15, %xmm3 +; SSE-NEXT: psllw $15, %xmm2 +; SSE-NEXT: packsswb %xmm3, %xmm2 ; SSE-NEXT: psllw $15, %xmm1 ; SSE-NEXT: psllw $15, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: por %xmm2, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: psllw $15, %xmm3 -; SSE-NEXT: psllw $15, %xmm2 -; SSE-NEXT: packsswb %xmm3, %xmm2 -; SSE-NEXT: pmovmskb %xmm2, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i16_and1: ; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2 +; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1 +; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 -; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1 -; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx +; AVX1-NEXT: testl %eax, %eax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1825,19 +1773,15 @@ ; ; KNL-LABEL: allzeros_v32i16_and1: ; KNL: # %bb.0: -; KNL-NEXT: vpsllw $15, %ymm0, %ymm1 +; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 -; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 +; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: kortestw %k0, %k0 ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -2645,25 +2589,21 @@ ; SSE-LABEL: allones_v32i8_and4: ; SSE: # %bb.0: ; SSE-NEXT: psllw $5, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: psllw $5, %xmm1 -; SSE-NEXT: pmovmskb %xmm1, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: orl %eax, %ecx -; SSE-NEXT: cmpl $-1, %ecx +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pmovmskb %xmm1, %eax +; SSE-NEXT: cmpw $-1, %ax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v32i8_and4: ; AVX1: # %bb.0: ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm1 -; AVX1-NEXT: vpmovmskb %xmm1, %eax ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: cmpl $-1, %ecx +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: cmpw $-1, %ax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2703,24 +2643,21 @@ define i1 @allzeros_v32i8_and4(<32 x i8> %arg) { ; SSE-LABEL: allzeros_v32i8_and4: ; SSE: # %bb.0: +; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: psllw $5, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: psllw $5, %xmm1 -; SSE-NEXT: pmovmskb %xmm1, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i8_and4: ; AVX1: # %bb.0: -; AVX1-NEXT: vpsllw $5, %xmm0, %xmm1 -; AVX1-NEXT: vpmovmskb %xmm1, %eax -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: testl %eax, %eax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2772,9 +2709,8 @@ ; SSE-NEXT: pmovmskb %xmm3, %edx ; SSE-NEXT: shll $16, %edx ; SSE-NEXT: orl %eax, %edx -; SSE-NEXT: shlq $32, %rdx -; SSE-NEXT: orq %rcx, %rdx -; SSE-NEXT: cmpq $-1, %rdx +; SSE-NEXT: andl %ecx, %edx +; SSE-NEXT: cmpl $-1, %edx ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; @@ -2794,22 +2730,19 @@ ; AVX1-NEXT: vpmovmskb %xmm0, %edx ; AVX1-NEXT: shll $16, %edx ; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: shlq $32, %rdx -; AVX1-NEXT: orq %rcx, %rdx -; AVX1-NEXT: cmpq $-1, %rdx +; AVX1-NEXT: andl %ecx, %edx +; AVX1-NEXT: cmpl $-1, %edx ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: allones_v64i8_and4: ; AVX2: # %bb.0: -; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX2-NEXT: vpmovmskb %ymm1, %eax -; AVX2-NEXT: shlq $32, %rax ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %ecx -; AVX2-NEXT: orq %rax, %rcx -; AVX2-NEXT: cmpq $-1, %rcx +; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: cmpl $-1, %eax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2819,11 +2752,9 @@ ; KNL-NEXT: vpsllw $5, %ymm0, %ymm1 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0 +; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vpmovmskb %ymm0, %eax -; KNL-NEXT: shlq $32, %rax -; KNL-NEXT: vpmovmskb %ymm1, %ecx -; KNL-NEXT: orq %rax, %rcx -; KNL-NEXT: cmpq $-1, %rcx +; KNL-NEXT: cmpl $-1, %eax ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -2845,66 +2776,61 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) { ; SSE-LABEL: allzeros_v64i8_and4: ; SSE: # %bb.0: -; SSE-NEXT: psllw $5, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: psllw $5, %xmm1 -; SSE-NEXT: pmovmskb %xmm1, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: orl %eax, %ecx ; SSE-NEXT: psllw $5, %xmm2 ; SSE-NEXT: pmovmskb %xmm2, %eax ; SSE-NEXT: psllw $5, %xmm3 -; SSE-NEXT: pmovmskb %xmm3, %edx +; SSE-NEXT: pmovmskb %xmm3, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: psllw $5, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: psllw $5, %xmm1 +; SSE-NEXT: pmovmskb %xmm1, %edx ; SSE-NEXT: shll $16, %edx ; SSE-NEXT: orl %eax, %edx -; SSE-NEXT: shlq $32, %rdx -; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: orl %ecx, %edx ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v64i8_and4: ; AVX1: # %bb.0: -; AVX1-NEXT: vpsllw $5, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $5, %xmm1, %xmm2 ; AVX1-NEXT: vpmovmskb %xmm2, %eax -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 +; AVX1-NEXT: vpmovmskb %xmm1, %ecx ; AVX1-NEXT: shll $16, %ecx ; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpsllw $5, %xmm1, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpsllw $5, %xmm0, %xmm1 +; AVX1-NEXT: vpmovmskb %xmm1, %eax +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %edx ; AVX1-NEXT: shll $16, %edx ; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: shlq $32, %rdx -; AVX1-NEXT: orq %rcx, %rdx +; AVX1-NEXT: orl %ecx, %edx ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: allzeros_v64i8_and4: ; AVX2: # %bb.0: -; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX2-NEXT: vpmovmskb %ymm1, %eax -; AVX2-NEXT: shlq $32, %rax +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %ecx -; AVX2-NEXT: orq %rax, %rcx +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: testl %eax, %eax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; KNL-LABEL: allzeros_v64i8_and4: ; KNL: # %bb.0: -; KNL-NEXT: vpsllw $5, %ymm0, %ymm1 -; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; KNL-NEXT: vpsllw $5, %ymm1, %ymm1 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0 +; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vpmovmskb %ymm0, %eax -; KNL-NEXT: shlq $32, %rax -; KNL-NEXT: vpmovmskb %ymm1, %ecx -; KNL-NEXT: orq %rax, %rcx +; KNL-NEXT: testl %eax, %eax ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -3074,14 +3000,12 @@ ; SSE-NEXT: psllw $13, %xmm1 ; SSE-NEXT: psllw $13, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: psllw $13, %xmm3 ; SSE-NEXT: psllw $13, %xmm2 ; SSE-NEXT: packsswb %xmm3, %xmm2 -; SSE-NEXT: pmovmskb %xmm2, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: orl %eax, %ecx -; SSE-NEXT: cmpl $-1, %ecx +; SSE-NEXT: pand %xmm0, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %eax +; SSE-NEXT: cmpw $-1, %ax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; @@ -3091,15 +3015,13 @@ ; AVX1-NEXT: vpsllw $13, %xmm2, %xmm2 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 -; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpsllw $13, %xmm2, %xmm2 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1 -; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: cmpl $-1, %ecx +; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: cmpw $-1, %ax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -3128,9 +3050,8 @@ ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: cmpl $-1, %ecx +; KNL-NEXT: andl %eax, %ecx +; KNL-NEXT: cmpl $65535, %ecx # imm = 0xFFFF ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -3152,33 +3073,31 @@ define i1 @allzeros_v32i16_and4(<32 x i16> %arg) { ; SSE-LABEL: allzeros_v32i16_and4: ; SSE: # %bb.0: +; SSE-NEXT: psllw $13, %xmm3 +; SSE-NEXT: psllw $13, %xmm2 +; SSE-NEXT: packsswb %xmm3, %xmm2 ; SSE-NEXT: psllw $13, %xmm1 ; SSE-NEXT: psllw $13, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: por %xmm2, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: psllw $13, %xmm3 -; SSE-NEXT: psllw $13, %xmm2 -; SSE-NEXT: packsswb %xmm3, %xmm2 -; SSE-NEXT: pmovmskb %xmm2, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i16_and4: ; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpsllw $13, %xmm2, %xmm2 +; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1 +; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpsllw $13, %xmm2, %xmm2 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 -; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1 -; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx +; AVX1-NEXT: testl %eax, %eax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -3196,19 +3115,15 @@ ; ; KNL-LABEL: allzeros_v32i16_and4: ; KNL: # %bb.0: -; KNL-NEXT: vpsllw $13, %ymm0, %ymm1 +; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; KNL-NEXT: vpsllw $13, %ymm1, %ymm1 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 -; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 +; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: kortestw %k0, %k0 ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq