Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -34909,6 +34909,39 @@ if (SDValue V = scalarizeExtEltFP(N, DAG)) return V; + // Attempt to extract a i1 element by using MOVMSK to extract the signbits + // and then selecting the relevant element. + if (CIdx && SrcVT.getScalarType() == MVT::i1) { + SmallVector BoolExtracts; + auto IsBoolExtract = [&BoolExtracts](SDNode *Use) { + if (Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa(Use->getOperand(1)) && + Use->getValueType(0) == MVT::i1) { + BoolExtracts.push_back(Use); + return true; + } + return false; + }; + if (all_of(InputVector->uses(), IsBoolExtract) && + BoolExtracts.size() > 1) { + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcElts); + if (SDValue BC = + combineBitcastvxi1(DAG, BCVT, InputVector, dl, Subtarget)) { + for (SDNode *Use : BoolExtracts) { + // extractelement vXi1 X, MaskIdx --> ((movmsk X) & Mask) == Mask + unsigned MaskIdx = Use->getConstantOperandVal(1); + APInt MaskBit = APInt::getOneBitSet(NumSrcElts, MaskIdx); + SDValue Mask = DAG.getConstant(MaskBit, dl, BCVT); + SDValue Res = DAG.getNode(ISD::AND, dl, BCVT, BC, Mask); + Res = DAG.getSetCC(dl, MVT::i1, Res, Mask, ISD::SETEQ); + DCI.CombineTo(Use, Res); + } + return SDValue(N, 0); + } + } + } + return SDValue(); } Index: test/CodeGen/X86/bitcast-vector-bool.ll =================================================================== --- test/CodeGen/X86/bitcast-vector-bool.ll +++ test/CodeGen/X86/bitcast-vector-bool.ll @@ -12,29 +12,18 @@ define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind { ; SSE2-SSSE3-LABEL: bitcast_v2i64_to_v2i1: ; SSE2-SSSE3: # %bb.0: -; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] -; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm0 -; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 -; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 -; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al +; SSE2-SSSE3-NEXT: movmskpd %xmm0, %ecx +; SSE2-SSSE3-NEXT: movl %ecx, %eax +; SSE2-SSSE3-NEXT: shrb %al +; SSE2-SSSE3-NEXT: addb %cl, %al ; SSE2-SSSE3-NEXT: retq ; ; AVX12-LABEL: bitcast_v2i64_to_v2i1: ; AVX12: # %bb.0: -; AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX12-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX12-NEXT: vpextrb $0, %xmm0, %ecx -; AVX12-NEXT: vpextrb $8, %xmm0, %eax +; AVX12-NEXT: vmovmskpd %xmm0, %ecx +; AVX12-NEXT: movl %ecx, %eax +; AVX12-NEXT: shrb %al ; AVX12-NEXT: addb %cl, %al -; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: retq ; ; AVX512-LABEL: bitcast_v2i64_to_v2i1: Index: test/CodeGen/X86/bool-vector.ll =================================================================== --- test/CodeGen/X86/bool-vector.ll +++ test/CodeGen/X86/bool-vector.ll @@ -94,45 +94,14 @@ ; ; X32-SSE2-LABEL: PR15215_good: ; X32-SSE2: # %bb.0: # %entry -; X32-SSE2-NEXT: pushl %esi -; X32-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X32-SSE2-NEXT: .cfi_offset %esi, -8 -; X32-SSE2-NEXT: movd %xmm0, %eax -; X32-SSE2-NEXT: andl $1, %eax -; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X32-SSE2-NEXT: movd %xmm1, %ecx -; X32-SSE2-NEXT: andl $1, %ecx -; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X32-SSE2-NEXT: movd %xmm1, %edx -; X32-SSE2-NEXT: andl $1, %edx -; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] -; X32-SSE2-NEXT: movd %xmm0, %esi -; X32-SSE2-NEXT: andl $1, %esi -; X32-SSE2-NEXT: leal (%eax,%ecx,2), %eax -; X32-SSE2-NEXT: leal (%eax,%edx,4), %eax -; X32-SSE2-NEXT: leal (%eax,%esi,8), %eax -; X32-SSE2-NEXT: popl %esi -; X32-SSE2-NEXT: .cfi_def_cfa_offset 4 +; X32-SSE2-NEXT: pslld $31, %xmm0 +; X32-SSE2-NEXT: movmskps %xmm0, %eax ; X32-SSE2-NEXT: retl ; ; X32-AVX2-LABEL: PR15215_good: ; X32-AVX2: # %bb.0: # %entry -; X32-AVX2-NEXT: pushl %esi -; X32-AVX2-NEXT: .cfi_def_cfa_offset 8 -; X32-AVX2-NEXT: .cfi_offset %esi, -8 -; X32-AVX2-NEXT: vmovd %xmm0, %eax -; X32-AVX2-NEXT: andl $1, %eax -; X32-AVX2-NEXT: vpextrd $1, %xmm0, %ecx -; X32-AVX2-NEXT: andl $1, %ecx -; X32-AVX2-NEXT: vpextrd $2, %xmm0, %edx -; X32-AVX2-NEXT: andl $1, %edx -; X32-AVX2-NEXT: vpextrd $3, %xmm0, %esi -; X32-AVX2-NEXT: andl $1, %esi -; X32-AVX2-NEXT: leal (%eax,%ecx,2), %eax -; X32-AVX2-NEXT: leal (%eax,%edx,4), %eax -; X32-AVX2-NEXT: leal (%eax,%esi,8), %eax -; X32-AVX2-NEXT: popl %esi -; X32-AVX2-NEXT: .cfi_def_cfa_offset 4 +; X32-AVX2-NEXT: vpslld $31, %xmm0, %xmm0 +; X32-AVX2-NEXT: vmovmskps %xmm0, %eax ; X32-AVX2-NEXT: retl ; ; X64-LABEL: PR15215_good: @@ -152,35 +121,14 @@ ; ; X64-SSE2-LABEL: PR15215_good: ; X64-SSE2: # %bb.0: # %entry -; X64-SSE2-NEXT: movd %xmm0, %eax -; X64-SSE2-NEXT: andl $1, %eax -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-SSE2-NEXT: movd %xmm1, %ecx -; X64-SSE2-NEXT: andl $1, %ecx -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-SSE2-NEXT: movd %xmm1, %edx -; X64-SSE2-NEXT: andl $1, %edx -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] -; X64-SSE2-NEXT: movd %xmm0, %esi -; X64-SSE2-NEXT: andl $1, %esi -; X64-SSE2-NEXT: leal (%rax,%rcx,2), %eax -; X64-SSE2-NEXT: leal (%rax,%rdx,4), %eax -; X64-SSE2-NEXT: leal (%rax,%rsi,8), %eax +; X64-SSE2-NEXT: pslld $31, %xmm0 +; X64-SSE2-NEXT: movmskps %xmm0, %eax ; X64-SSE2-NEXT: retq ; ; X64-AVX2-LABEL: PR15215_good: ; X64-AVX2: # %bb.0: # %entry -; X64-AVX2-NEXT: vmovd %xmm0, %eax -; X64-AVX2-NEXT: andl $1, %eax -; X64-AVX2-NEXT: vpextrd $1, %xmm0, %ecx -; X64-AVX2-NEXT: andl $1, %ecx -; X64-AVX2-NEXT: vpextrd $2, %xmm0, %edx -; X64-AVX2-NEXT: andl $1, %edx -; X64-AVX2-NEXT: vpextrd $3, %xmm0, %esi -; X64-AVX2-NEXT: andl $1, %esi -; X64-AVX2-NEXT: leal (%rax,%rcx,2), %eax -; X64-AVX2-NEXT: leal (%rax,%rdx,4), %eax -; X64-AVX2-NEXT: leal (%rax,%rsi,8), %eax +; X64-AVX2-NEXT: vpslld $31, %xmm0, %xmm0 +; X64-AVX2-NEXT: vmovmskps %xmm0, %eax ; X64-AVX2-NEXT: retq entry: %0 = trunc <4 x i32> %input to <4 x i1> Index: test/CodeGen/X86/movmsk-cmp.ll =================================================================== --- test/CodeGen/X86/movmsk-cmp.ll +++ test/CodeGen/X86/movmsk-cmp.ll @@ -4273,19 +4273,31 @@ ; SSE2-LABEL: movmsk_v16i8: ; SSE2: # %bb.0: ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: xorb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb -{{[0-9]+}}(%rsp), %al +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: movl %eax, %ecx +; SSE2-NEXT: shrl $15, %ecx +; SSE2-NEXT: movl %eax, %edx +; SSE2-NEXT: shrl $8, %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: andl $8, %eax +; SSE2-NEXT: shrl $3, %eax +; SSE2-NEXT: xorl %edx, %eax +; SSE2-NEXT: andl %ecx, %eax +; SSE2-NEXT: # kill: def $al killed $al killed $eax ; SSE2-NEXT: retq ; ; AVX-LABEL: movmsk_v16i8: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $3, %xmm0, %eax -; AVX-NEXT: vpextrb $8, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %ecx -; AVX-NEXT: vpextrb $15, %xmm0, %eax +; AVX-NEXT: vpmovmskb %xmm0, %eax +; AVX-NEXT: movl %eax, %ecx +; AVX-NEXT: shrl $15, %ecx +; AVX-NEXT: movl %eax, %edx +; AVX-NEXT: shrl $8, %edx +; AVX-NEXT: andl $1, %edx +; AVX-NEXT: andl $8, %eax +; AVX-NEXT: shrl $3, %eax +; AVX-NEXT: xorl %edx, %eax ; AVX-NEXT: andl %ecx, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq @@ -4333,27 +4345,35 @@ ; SSE2-LABEL: movmsk_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: pcmpgtw %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: pextrw $1, %xmm0, %edx -; SSE2-NEXT: pextrw $7, %xmm0, %esi -; SSE2-NEXT: pextrw $4, %xmm0, %eax -; SSE2-NEXT: andl %esi, %eax -; SSE2-NEXT: andl %edx, %eax -; SSE2-NEXT: andl %ecx, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax +; SSE2-NEXT: packsswb %xmm0, %xmm0 +; SSE2-NEXT: pmovmskb %xmm0, %ecx +; SSE2-NEXT: movl %ecx, %eax +; SSE2-NEXT: shrb $7, %al +; SSE2-NEXT: movl %ecx, %edx +; SSE2-NEXT: andb $16, %dl +; SSE2-NEXT: shrb $4, %dl +; SSE2-NEXT: andb %al, %dl +; SSE2-NEXT: movl %ecx, %eax +; SSE2-NEXT: shrb %al +; SSE2-NEXT: andb %dl, %al +; SSE2-NEXT: andb %cl, %al ; SSE2-NEXT: retq ; ; AVX-LABEL: movmsk_v8i16: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovd %xmm0, %ecx -; AVX-NEXT: vpextrw $1, %xmm0, %edx -; AVX-NEXT: vpextrw $7, %xmm0, %esi -; AVX-NEXT: vpextrw $4, %xmm0, %eax -; AVX-NEXT: andl %esi, %eax -; AVX-NEXT: andl %edx, %eax -; AVX-NEXT: andl %ecx, %eax -; AVX-NEXT: # kill: def $al killed $al killed $eax +; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpmovmskb %xmm0, %ecx +; AVX-NEXT: movl %ecx, %eax +; AVX-NEXT: shrb $7, %al +; AVX-NEXT: movl %ecx, %edx +; AVX-NEXT: andb $16, %dl +; AVX-NEXT: shrb $4, %dl +; AVX-NEXT: andb %al, %dl +; AVX-NEXT: movl %ecx, %eax +; AVX-NEXT: shrb %al +; AVX-NEXT: andb %dl, %al +; AVX-NEXT: andb %cl, %al ; AVX-NEXT: retq ; ; KNL-LABEL: movmsk_v8i16: @@ -4405,20 +4425,24 @@ ; SSE2-LABEL: movmsk_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: xorl %ecx, %eax +; SSE2-NEXT: movmskps %xmm1, %eax +; SSE2-NEXT: movl %eax, %ecx +; SSE2-NEXT: shrb $3, %cl +; SSE2-NEXT: andb $4, %al +; SSE2-NEXT: shrb $2, %al +; SSE2-NEXT: xorb %cl, %al ; SSE2-NEXT: # kill: def $al killed $al killed $eax ; SSE2-NEXT: retq ; ; AVX-LABEL: movmsk_v4i32: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpextrd $2, %xmm0, %ecx -; AVX-NEXT: vpextrd $3, %xmm0, %eax -; AVX-NEXT: xorl %ecx, %eax +; AVX-NEXT: vmovmskps %xmm0, %eax +; AVX-NEXT: movl %eax, %ecx +; AVX-NEXT: shrb $3, %cl +; AVX-NEXT: andb $4, %al +; AVX-NEXT: shrb $2, %al +; AVX-NEXT: xorb %cl, %al ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -4461,11 +4485,10 @@ ; SSE2-NEXT: pand %xmm0, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: andl %ecx, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax +; SSE2-NEXT: movmskpd %xmm0, %ecx +; SSE2-NEXT: movl %ecx, %eax +; SSE2-NEXT: shrb %al +; SSE2-NEXT: andb %cl, %al ; SSE2-NEXT: retq ; ; AVX-LABEL: movmsk_v2i64: @@ -4473,10 +4496,10 @@ ; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrd $2, %xmm0, %ecx -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: andl %ecx, %eax -; AVX-NEXT: # kill: def $al killed $al killed $eax +; AVX-NEXT: vmovmskpd %xmm0, %ecx +; AVX-NEXT: movl %ecx, %eax +; AVX-NEXT: shrb %al +; AVX-NEXT: andb %cl, %al ; AVX-NEXT: retq ; ; KNL-LABEL: movmsk_v2i64: @@ -4515,25 +4538,17 @@ ; SSE2-NEXT: cmpeqps %xmm1, %xmm2 ; SSE2-NEXT: cmpunordps %xmm1, %xmm0 ; SSE2-NEXT: orps %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm1, %edx -; SSE2-NEXT: pextrw $6, %xmm0, %eax -; SSE2-NEXT: orl %edx, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax +; SSE2-NEXT: movmskps %xmm0, %eax +; SSE2-NEXT: testb $14, %al +; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; ; AVX-LABEL: movmsk_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vextractps $1, %xmm0, %ecx -; AVX-NEXT: vextractps $2, %xmm0, %edx -; AVX-NEXT: vpextrb $12, %xmm0, %eax -; AVX-NEXT: orl %edx, %eax -; AVX-NEXT: orl %ecx, %eax -; AVX-NEXT: # kill: def $al killed $al killed $eax +; AVX-NEXT: vmovmskps %xmm0, %eax +; AVX-NEXT: testb $14, %al +; AVX-NEXT: setne %al ; AVX-NEXT: retq ; ; KNL-LABEL: movmsk_v4f32: @@ -4579,20 +4594,19 @@ ; SSE2-LABEL: movmsk_v2f64: ; SSE2: # %bb.0: ; SSE2-NEXT: cmplepd %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: andl %ecx, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax +; SSE2-NEXT: movmskpd %xmm1, %ecx +; SSE2-NEXT: movl %ecx, %eax +; SSE2-NEXT: shrb %al +; SSE2-NEXT: andb %cl, %al ; SSE2-NEXT: retq ; ; AVX-LABEL: movmsk_v2f64: ; AVX: # %bb.0: ; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vextractps $2, %xmm0, %ecx -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: andl %ecx, %eax -; AVX-NEXT: # kill: def $al killed $al killed $eax +; AVX-NEXT: vmovmskpd %xmm0, %ecx +; AVX-NEXT: movl %ecx, %eax +; AVX-NEXT: shrb %al +; AVX-NEXT: andb %cl, %al ; AVX-NEXT: retq ; ; KNL-LABEL: movmsk_v2f64: @@ -4628,26 +4642,21 @@ ; SSE2-LABEL: PR39665_c_ray: ; SSE2: # %bb.0: ; SSE2-NEXT: cmpltpd %xmm0, %xmm1 -; SSE2-NEXT: movapd %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: testb $1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movl $42, %eax -; SSE2-NEXT: movl $99, %ecx -; SSE2-NEXT: cmovel %ecx, %eax -; SSE2-NEXT: testb $1, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movmskpd %xmm1, %eax +; SSE2-NEXT: cmpb $3, %al +; SSE2-NEXT: movl $42, %ecx +; SSE2-NEXT: movl $99, %eax ; SSE2-NEXT: cmovel %ecx, %eax ; SSE2-NEXT: retq ; ; AVX-LABEL: PR39665_c_ray: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %ecx -; AVX-NEXT: vpextrb $8, %xmm0, %eax -; AVX-NEXT: testb $1, %al -; AVX-NEXT: movl $42, %eax -; AVX-NEXT: movl $99, %edx -; AVX-NEXT: cmovel %edx, %eax -; AVX-NEXT: testb $1, %cl -; AVX-NEXT: cmovel %edx, %eax +; AVX-NEXT: vmovmskpd %xmm0, %eax +; AVX-NEXT: cmpb $3, %al +; AVX-NEXT: movl $42, %ecx +; AVX-NEXT: movl $99, %eax +; AVX-NEXT: cmovel %ecx, %eax ; AVX-NEXT: retq ; ; KNL-LABEL: PR39665_c_ray: