Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -40912,7 +40912,52 @@ return SDValue(); } +static SDValue combineMovmskPackss(SDNode *Movmsk, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + // Make sure we can replace a 128-bit MOVMSK with a 256-bit MOVMSK. + // TODO: It may be possible to ease the i32/i64 constraint to allow more + // optimization with AVX1. + MVT MovmskVecVT = Movmsk->getOperand(0).getSimpleValueType(); + if (!MovmskVecVT.is128BitVector()) + return SDValue(); + MVT EltVT = MovmskVecVT.getVectorElementType(); + bool CanMovmsk256FP = + ((EltVT == MVT::f32 || EltVT == MVT::f64) && Subtarget.hasAVX()) || + ((EltVT == MVT::i32 || EltVT == MVT::i64) && Subtarget.hasAVX2()); + bool CanMovmsk_v32i8 = EltVT == MVT::i8 && Subtarget.hasAVX2(); + if (!CanMovmsk256FP && !CanMovmsk_v32i8) + return SDValue(); + + // There has to be a PACKSS operand to this MOVMSK. + SDValue Packss = peekThroughBitcasts(Movmsk->getOperand(0)); + if (Packss.getOpcode() != X86ISD::PACKSS) + return SDValue(); + + // The PACKSS must use 2 extracted halves of a common source vector. + SDValue Extract0 = peekThroughBitcasts(Packss.getOperand(0)); + SDValue Extract1 = peekThroughBitcasts(Packss.getOperand(1)); + if (Extract0.getOpcode() != ISD::EXTRACT_SUBVECTOR || + Extract1.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + SDValue X = Extract0.getOperand(0); + MVT XVT = X.getSimpleValueType(); + if (X != Extract1.getOperand(0) || !XVT.is256BitVector()) + return SDValue(); + if (!isNullConstant(Extract0.getOperand(1))) + return SDValue(); + auto *Index1C = dyn_cast(Extract1.getOperand(1)); + if (!Index1C || Index1C->getZExtValue() != XVT.getVectorNumElements() / 2) + return SDValue(); + + // movmsk (packss (extract X, 0), (extract X, 2)) --> movmsk (bitcast X) + MVT VT256 = MVT::getVectorVT(EltVT, MovmskVecVT.getVectorNumElements() * 2); + SDValue CastX = DAG.getBitcast(VT256, X); + MVT VT = Movmsk->getSimpleValueType(0); + return DAG.getNode(X86ISD::MOVMSK, SDLoc(Movmsk), VT, CastX); +} + static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget, TargetLowering::DAGCombinerInfo &DCI) { SDValue Src = N->getOperand(0); MVT SrcVT = Src.getSimpleValueType(); @@ -40975,6 +41020,9 @@ } } + if (SDValue V = combineMovmskPackss(N, DAG, Subtarget)) + return V; + return SDValue(); } @@ -42771,7 +42819,7 @@ case X86ISD::FMSUBADD_RND: case X86ISD::FMADDSUB: case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, Subtarget); - case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI); + case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, Subtarget, DCI); case X86ISD::MGATHER: case X86ISD::MSCATTER: case ISD::MGATHER: Index: llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll =================================================================== --- llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll +++ llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll @@ -76,9 +76,7 @@ ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovmskps %xmm0, %eax +; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -121,17 +119,27 @@ ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSE2-SSSE3-NEXT: retq ; -; AVX12-LABEL: v4f64: -; AVX12: # %bb.0: -; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1 -; AVX12-NEXT: vandpd %ymm1, %ymm0, %ymm0 -; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vmovmskps %xmm0, %eax -; AVX12-NEXT: # kill: def $al killed $al killed $eax -; AVX12-NEXT: vzeroupper -; AVX12-NEXT: retq +; AVX1-LABEL: v4f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1 +; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovmskps %xmm0, %eax +; AVX1-NEXT: # kill: def $al killed $al killed $eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: v4f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1 +; AVX2-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovmskps %ymm0, %eax +; AVX2-NEXT: # kill: def $al killed $al killed $eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq ; ; AVX512F-LABEL: v4f64: ; AVX512F: # %bb.0: @@ -194,9 +202,7 @@ ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq Index: llvm/test/CodeGen/X86/bitcast-setcc-256.ll =================================================================== --- llvm/test/CodeGen/X86/bitcast-setcc-256.ll +++ llvm/test/CodeGen/X86/bitcast-setcc-256.ll @@ -31,9 +31,7 @@ ; AVX2-LABEL: v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -384,9 +382,7 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: movw %ax, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq Index: llvm/test/CodeGen/X86/bitcast-setcc-512.ll =================================================================== --- llvm/test/CodeGen/X86/bitcast-setcc-512.ll +++ llvm/test/CodeGen/X86/bitcast-setcc-512.ll @@ -109,13 +109,10 @@ ; ; AVX2-LABEL: v16i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpackssdw %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -170,13 +167,10 @@ ; ; AVX2-LABEL: v16f32: ; AVX2: # %bb.0: -; AVX2-NEXT: vcmpltps %ymm1, %ymm3, %ymm1 ; AVX2-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpackssdw %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -592,14 +586,11 @@ ; ; AVX2-LABEL: bitcast_16i32_store: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpackssdw %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: movw %ax, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq Index: llvm/test/CodeGen/X86/movmsk-cmp.ll =================================================================== --- llvm/test/CodeGen/X86/movmsk-cmp.ll +++ llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -433,9 +433,7 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: cmpw $-1, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper @@ -495,9 +493,7 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: testw %ax, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper @@ -905,14 +901,11 @@ ; ; AVX2-LABEL: allones_v16i32_sign: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpackssdw %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: cmpw $-1, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper @@ -979,14 +972,11 @@ ; ; AVX2-LABEL: allzeros_v16i32_sign: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpackssdw %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: testw %ax, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper @@ -1856,9 +1846,7 @@ ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: cmpw $-1, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper @@ -2121,9 +2109,7 @@ ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: testw %ax, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper @@ -2411,16 +2397,12 @@ ; ; AVX2-LABEL: allones_v16i32_and1: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] -; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackssdw %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: cmpw $-1, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper @@ -2496,16 +2478,12 @@ ; ; AVX2-LABEL: allzeros_v16i32_and1: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] -; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackssdw %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: testw %ax, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper @@ -3455,9 +3433,7 @@ ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: cmpw $-1, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper @@ -3720,9 +3696,7 @@ ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: testw %ax, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper @@ -4010,16 +3984,12 @@ ; ; AVX2-LABEL: allones_v16i32_and4: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4] -; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackssdw %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: cmpw $-1, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper @@ -4095,16 +4065,12 @@ ; ; AVX2-LABEL: allzeros_v16i32_and4: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4] -; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackssdw %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: testw %ax, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper Index: llvm/test/CodeGen/X86/vector-compare-all_of.ll =================================================================== --- llvm/test/CodeGen/X86/vector-compare-all_of.ll +++ llvm/test/CodeGen/X86/vector-compare-all_of.ll @@ -91,18 +91,29 @@ ; SSE-NEXT: cmovneq %rcx, %rax ; SSE-NEXT: retq ; -; AVX-LABEL: test_v4f64_legal_sext: -; AVX: # %bb.0: -; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: xorl %ecx, %ecx -; AVX-NEXT: cmpl $15, %eax -; AVX-NEXT: movq $-1, %rax -; AVX-NEXT: cmovneq %rcx, %rax -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: test_v4f64_legal_sext: +; AVX1: # %bb.0: +; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovmskps %xmm0, %eax +; AVX1-NEXT: xorl %ecx, %ecx +; AVX1-NEXT: cmpl $15, %eax +; AVX1-NEXT: movq $-1, %rax +; AVX1-NEXT: cmovneq %rcx, %rax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v4f64_legal_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vmovmskps %ymm0, %eax +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: cmpl $15, %eax +; AVX2-NEXT: movq $-1, %rax +; AVX2-NEXT: cmovneq %rcx, %rax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq ; ; AVX512-LABEL: test_v4f64_legal_sext: ; AVX512: # %bb.0: @@ -229,18 +240,29 @@ ; SSE-NEXT: cmovnel %ecx, %eax ; SSE-NEXT: retq ; -; AVX-LABEL: test_v8f32_legal_sext: -; AVX: # %bb.0: -; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: xorl %ecx, %ecx -; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX-NEXT: movl $-1, %eax -; AVX-NEXT: cmovnel %ecx, %eax -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: test_v8f32_legal_sext: +; AVX1: # %bb.0: +; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: xorl %ecx, %ecx +; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX1-NEXT: movl $-1, %eax +; AVX1-NEXT: cmovnel %ecx, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v8f32_legal_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX2-NEXT: movl $-1, %eax +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq ; ; AVX512-LABEL: test_v8f32_legal_sext: ; AVX512: # %bb.0: @@ -389,9 +411,7 @@ ; AVX2-LABEL: test_v4i64_legal_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovmskps %xmm0, %eax +; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: cmpl $15, %eax ; AVX2-NEXT: movq $-1, %rax @@ -557,9 +577,7 @@ ; AVX2-LABEL: test_v8i32_legal_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF ; AVX2-NEXT: movl $-1, %eax @@ -746,9 +764,7 @@ ; AVX2-LABEL: test_v16i16_legal_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF ; AVX2-NEXT: movl $-1, %eax @@ -1506,9 +1522,7 @@ ; AVX2-LABEL: bool_reduction_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF ; AVX2-NEXT: movl $-1, %eax Index: llvm/test/CodeGen/X86/vector-compare-any_of.ll =================================================================== --- llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -90,18 +90,29 @@ ; SSE-NEXT: movslq %ecx, %rax ; SSE-NEXT: retq ; -; AVX-LABEL: test_v4f64_legal_sext: -; AVX: # %bb.0: -; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: xorl %ecx, %ecx -; AVX-NEXT: cmpl %eax, %ecx -; AVX-NEXT: sbbl %ecx, %ecx -; AVX-NEXT: movslq %ecx, %rax -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: test_v4f64_legal_sext: +; AVX1: # %bb.0: +; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovmskps %xmm0, %eax +; AVX1-NEXT: xorl %ecx, %ecx +; AVX1-NEXT: cmpl %eax, %ecx +; AVX1-NEXT: sbbl %ecx, %ecx +; AVX1-NEXT: movslq %ecx, %rax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v4f64_legal_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vmovmskps %ymm0, %eax +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: cmpl %eax, %ecx +; AVX2-NEXT: sbbl %ecx, %ecx +; AVX2-NEXT: movslq %ecx, %rax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq ; ; AVX512-LABEL: test_v4f64_legal_sext: ; AVX512: # %bb.0: @@ -223,17 +234,27 @@ ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; -; AVX-LABEL: test_v8f32_legal_sext: -; AVX: # %bb.0: -; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl %ecx, %eax -; AVX-NEXT: sbbl %eax, %eax -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: test_v8f32_legal_sext: +; AVX1: # %bb.0: +; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: cmpl %ecx, %eax +; AVX1-NEXT: sbbl %eax, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v8f32_legal_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpmovmskb %ymm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: cmpl %ecx, %eax +; AVX2-NEXT: sbbl %eax, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq ; ; AVX512-LABEL: test_v8f32_legal_sext: ; AVX512: # %bb.0: @@ -380,9 +401,7 @@ ; AVX2-LABEL: test_v4i64_legal_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovmskps %xmm0, %eax +; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: cmpl %eax, %ecx ; AVX2-NEXT: sbbl %ecx, %ecx @@ -541,9 +560,7 @@ ; AVX2-LABEL: test_v8i32_legal_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %ecx +; AVX2-NEXT: vpmovmskb %ymm0, %ecx ; AVX2-NEXT: xorl %eax, %eax ; AVX2-NEXT: cmpl %ecx, %eax ; AVX2-NEXT: sbbl %eax, %eax @@ -724,9 +741,7 @@ ; AVX2-LABEL: test_v16i16_legal_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %ecx +; AVX2-NEXT: vpmovmskb %ymm0, %ecx ; AVX2-NEXT: xorl %eax, %eax ; AVX2-NEXT: cmpl %ecx, %eax ; AVX2-NEXT: sbbl %eax, %eax @@ -1494,9 +1509,7 @@ ; AVX2-LABEL: bool_reduction_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %ecx +; AVX2-NEXT: vpmovmskb %ymm0, %ecx ; AVX2-NEXT: xorl %eax, %eax ; AVX2-NEXT: cmpl %ecx, %eax ; AVX2-NEXT: sbbl %eax, %eax